Use external/local ROCm SDK (#5959)
diff --git a/build_tools/third_party/rocm/CMakeLists.txt b/build_tools/third_party/rocm/CMakeLists.txt
index 95ddb61..c2938cc 100644
--- a/build_tools/third_party/rocm/CMakeLists.txt
+++ b/build_tools/third_party/rocm/CMakeLists.txt
@@ -11,8 +11,19 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+if(NOT ${IREE_BUILD_EXPERIMENTAL_ROCM})
+ return()
+endif()
-set(ROCM_HEADERS_API_ROOT "${IREE_ROOT_DIR}/third_party/rocm/include")
+if(NOT ROCM_HEADERS_API_ROOT)
+ set(ROCM_HEADERS_API_ROOT "/opt/rocm/include")
+endif()
+
+if (EXISTS ${ROCM_HEADERS_API_ROOT})
+ message(STATUS "ROCm Header Path: ${ROCM_HEADERS_API_ROOT}")
+else()
+ message(SEND_ERROR "Could not locate ROCm: ${ROCM_HEADERS_API_ROOT}")
+endif()
external_cc_library(
PACKAGE
@@ -27,3 +38,4 @@
${ROCM_HEADERS_API_ROOT}
)
+unset(ROCM_HEADERS_API_ROOT)
\ No newline at end of file
diff --git a/third_party/rocm/LICENSE b/third_party/rocm/LICENSE
deleted file mode 100644
index 7c79cca..0000000
--- a/third_party/rocm/LICENSE
+++ /dev/null
@@ -1,27 +0,0 @@
-Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-with the Software without restriction, including without limitation the
-rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-sell copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
- * Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimers.
-
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimers in the
- documentation and/or other materials provided with the distribution.
-
- * Neither the names of Advanced Micro Devices, Inc. nor the names of its
- contributors may be used to endorse or promote products derived from
- this Software without specific prior written permission.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
-THE SOFTWARE.
\ No newline at end of file
diff --git a/third_party/rocm/README.txt b/third_party/rocm/README.txt
deleted file mode 100644
index 202619d..0000000
--- a/third_party/rocm/README.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-This folder contains a subset of ROCM SDK headers needed to build Experimental IREE ROCM Backend.
-It will also contains amdgcn bc files llvm module used to import __oc* function
-during ROCm HSACO(code object) kernel compilation.
diff --git a/third_party/rocm/UPDATING.md b/third_party/rocm/UPDATING.md
deleted file mode 100644
index 2cab28e..0000000
--- a/third_party/rocm/UPDATING.md
+++ /dev/null
@@ -1,15 +0,0 @@
-Those headers come from ROCM SDK.
-
-Currently updates are not supported by ROCm, so we need to uninstall and reinstall ROCm if we want to update
-To update, install ROCM SDK locally:
-```
-sudo apt autoremove rocm-opencl rocm-dkms rocm-dev rocm-utils && sudo reboot
-sudo apt-get install rocm-dkms
-```
-
-Copy HIP and HSA headers, version.txt and libdevice.10.bc:
-```
-cp -RL /opt/rocm/include/hip ./include/
-cp -RL /opt/rocm/include/hsa ./include/
-cp /opt/rocm/.info/version version.txt
-```
diff --git a/third_party/rocm/include/hip/channel_descriptor.h b/third_party/rocm/include/hip/channel_descriptor.h
deleted file mode 100644
index 842701b..0000000
--- a/third_party/rocm/include/hip/channel_descriptor.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_CHANNEL_DESCRIPTOR_H
-#define HIP_INCLUDE_HIP_CHANNEL_DESCRIPTOR_H
-
-// Some standard header files, these are included by hc.hpp and so want to make them avail on both
-// paths to provide a consistent include env and avoid "missing symbol" errors that only appears
-// on NVCC path:
-
-
-#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
-#include <hip/hcc_detail/channel_descriptor.h>
-#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
-#include <hip/nvcc_detail/channel_descriptor.h>
-#else
-#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
-#endif
-
-#endif
diff --git a/third_party/rocm/include/hip/device_functions.h b/third_party/rocm/include/hip/device_functions.h
deleted file mode 100644
index f6059f2..0000000
--- a/third_party/rocm/include/hip/device_functions.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_DEVICE_FUNCTIONS_H
-#define HIP_INCLUDE_HIP_DEVICE_FUNCTIONS_H
-
-#include <hip/hip_common.h>
-
-#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
-#include <hip/hcc_detail/device_functions.h>
-#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
-#include <device_functions.h>
-#else
-#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
-#endif
-
-#endif
diff --git a/third_party/rocm/include/hip/driver_types.h b/third_party/rocm/include/hip/driver_types.h
deleted file mode 100644
index d428ec7..0000000
--- a/third_party/rocm/include/hip/driver_types.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_DRIVER_TYPES_H
-#define HIP_INCLUDE_HIP_DRIVER_TYPES_H
-
-#include <hip/hip_common.h>
-
-#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
-#include <hip/hcc_detail/driver_types.h>
-#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
-#include "driver_types.h"
-#else
-#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
-#endif
-
-#endif
diff --git a/third_party/rocm/include/hip/hcc_detail/channel_descriptor.h b/third_party/rocm/include/hip/hcc_detail/channel_descriptor.h
deleted file mode 100644
index 417451f..0000000
--- a/third_party/rocm/include/hip/hcc_detail/channel_descriptor.h
+++ /dev/null
@@ -1,354 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_CHANNEL_DESCRIPTOR_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_CHANNEL_DESCRIPTOR_H
-
-#include <hip/hip_common.h>
-#include <hip/hcc_detail/driver_types.h>
-#include <hip/hcc_detail/hip_vector_types.h>
-
-#ifdef __cplusplus
-
-#if __HIP_ROCclr__
-extern "C" {
-#endif
-HIP_PUBLIC_API
-hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f);
-#if __HIP_ROCclr__
-}
-#endif
-
-static inline hipChannelFormatDesc hipCreateChannelDescHalf() {
- int e = (int)sizeof(unsigned short) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat);
-}
-
-static inline hipChannelFormatDesc hipCreateChannelDescHalf1() {
- int e = (int)sizeof(unsigned short) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat);
-}
-
-static inline hipChannelFormatDesc hipCreateChannelDescHalf2() {
- int e = (int)sizeof(unsigned short) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat);
-}
-
-template <typename T>
-static inline hipChannelFormatDesc hipCreateChannelDesc() {
- return hipCreateChannelDesc(0, 0, 0, 0, hipChannelFormatKindNone);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<char>() {
- int e = (int)sizeof(char) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<signed char>() {
- int e = (int)sizeof(signed char) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<unsigned char>() {
- int e = (int)sizeof(unsigned char) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<uchar1>() {
- int e = (int)sizeof(unsigned char) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<char1>() {
- int e = (int)sizeof(signed char) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<uchar2>() {
- int e = (int)sizeof(unsigned char) * 8;
- return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<char2>() {
- int e = (int)sizeof(signed char) * 8;
- return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned);
-}
-
-#ifndef __GNUC__ // vector3 is the same as vector4
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<uchar3>() {
- int e = (int)sizeof(unsigned char) * 8;
- return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<char3>() {
- int e = (int)sizeof(signed char) * 8;
- return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned);
-}
-#endif
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<uchar4>() {
- int e = (int)sizeof(unsigned char) * 8;
- return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<char4>() {
- int e = (int)sizeof(signed char) * 8;
- return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<unsigned short>() {
- int e = (int)sizeof(unsigned short) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<signed short>() {
- int e = (int)sizeof(signed short) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<ushort1>() {
- int e = (int)sizeof(unsigned short) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<short1>() {
- int e = (int)sizeof(signed short) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<ushort2>() {
- int e = (int)sizeof(unsigned short) * 8;
- return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<short2>() {
- int e = (int)sizeof(signed short) * 8;
- return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned);
-}
-
-#ifndef __GNUC__
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<ushort3>() {
- int e = (int)sizeof(unsigned short) * 8;
- return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<short3>() {
- int e = (int)sizeof(signed short) * 8;
- return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned);
-}
-#endif
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<ushort4>() {
- int e = (int)sizeof(unsigned short) * 8;
- return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<short4>() {
- int e = (int)sizeof(signed short) * 8;
- return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<unsigned int>() {
- int e = (int)sizeof(unsigned int) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<signed int>() {
- int e = (int)sizeof(signed int) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<uint1>() {
- int e = (int)sizeof(unsigned int) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<int1>() {
- int e = (int)sizeof(signed int) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<uint2>() {
- int e = (int)sizeof(unsigned int) * 8;
- return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<int2>() {
- int e = (int)sizeof(signed int) * 8;
- return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned);
-}
-
-#ifndef __GNUC__
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<uint3>() {
- int e = (int)sizeof(unsigned int) * 8;
- return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<int3>() {
- int e = (int)sizeof(signed int) * 8;
- return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned);
-}
-#endif
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<uint4>() {
- int e = (int)sizeof(unsigned int) * 8;
- return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<int4>() {
- int e = (int)sizeof(signed int) * 8;
- return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<float>() {
- int e = (int)sizeof(float) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<float1>() {
- int e = (int)sizeof(float) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<float2>() {
- int e = (int)sizeof(float) * 8;
- return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindFloat);
-}
-
-#ifndef __GNUC__
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<float3>() {
- int e = (int)sizeof(float) * 8;
- return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindFloat);
-}
-#endif
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<float4>() {
- int e = (int)sizeof(float) * 8;
- return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindFloat);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<unsigned long>() {
- int e = (int)sizeof(unsigned long) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<signed long>() {
- int e = (int)sizeof(signed long) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<ulong1>() {
- int e = (int)sizeof(unsigned long) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<long1>() {
- int e = (int)sizeof(signed long) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<ulong2>() {
- int e = (int)sizeof(unsigned long) * 8;
- return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<long2>() {
- int e = (int)sizeof(signed long) * 8;
- return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned);
-}
-
-#ifndef __GNUC__
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<ulong3>() {
- int e = (int)sizeof(unsigned long) * 8;
- return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<long3>() {
- int e = (int)sizeof(signed long) * 8;
- return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned);
-}
-#endif
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<ulong4>() {
- int e = (int)sizeof(unsigned long) * 8;
- return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<long4>() {
- int e = (int)sizeof(signed long) * 8;
- return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned);
-}
-
-#else
-
-struct hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w,
- enum hipChannelFormatKind f);
-
-#endif
-
-#endif
diff --git a/third_party/rocm/include/hip/hcc_detail/concepts.hpp b/third_party/rocm/include/hip/hcc_detail/concepts.hpp
deleted file mode 100644
index 373cefb..0000000
--- a/third_party/rocm/include/hip/hcc_detail/concepts.hpp
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
-Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#pragma once
-
-namespace hip_impl // Documentation only.
-{
-#define requires(...)
-
-#define FunctionalProcedure typename
-} // namespace hip_impl
diff --git a/third_party/rocm/include/hip/hcc_detail/cuda/cuda.h b/third_party/rocm/include/hip/hcc_detail/cuda/cuda.h
deleted file mode 100644
index 8b13789..0000000
--- a/third_party/rocm/include/hip/hcc_detail/cuda/cuda.h
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/third_party/rocm/include/hip/hcc_detail/cuda/math_functions.h b/third_party/rocm/include/hip/hcc_detail/cuda/math_functions.h
deleted file mode 100644
index 8b13789..0000000
--- a/third_party/rocm/include/hip/hcc_detail/cuda/math_functions.h
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/third_party/rocm/include/hip/hcc_detail/device_functions.h b/third_party/rocm/include/hip/hcc_detail/device_functions.h
deleted file mode 100644
index 515b4cc..0000000
--- a/third_party/rocm/include/hip/hcc_detail/device_functions.h
+++ /dev/null
@@ -1,1431 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_DEVICE_FUNCTIONS_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_DEVICE_FUNCTIONS_H
-
-#include "host_defines.h"
-#include "math_fwd.h"
-
-#include <hip/hip_runtime_api.h>
-#include <stddef.h>
-
-
-#include <hip/hip_vector_types.h>
-#include <hip/hcc_detail/device_library_decls.h>
-#include <hip/hcc_detail/llvm_intrinsics.h>
-
-#if __HIP_CLANG_ONLY__ && __HIP_ROCclr__ && !_WIN32
-extern "C" __device__ int printf(const char *fmt, ...);
-#else
-#if HC_FEATURE_PRINTF
-template <typename... All>
-static inline __device__ void printf(const char* format, All... all) {
- hc::printf(format, all...);
-}
-#else
-template <typename... All>
-static inline __device__ void printf(const char* format, All... all) {}
-#endif // HC_FEATURE_PRINTF
-#endif // __HIP_CLANG_ONLY__ && __HIP_ROCclr__
-
-/*
-Integer Intrinsics
-*/
-
-// integer intrinsic function __poc __clz __ffs __brev
-__device__ static inline unsigned int __popc(unsigned int input) {
- return __builtin_popcount(input);
-}
-__device__ static inline unsigned int __popcll(unsigned long long int input) {
- return __builtin_popcountll(input);
-}
-
-__device__ static inline int __clz(int input) {
- return __ockl_clz_u32((uint)input);
-}
-
-__device__ static inline int __clzll(long long int input) {
- return __ockl_clz_u64((ullong)input);
-}
-
-__device__ static inline unsigned int __ffs(unsigned int input) {
- return ( input == 0 ? -1 : __builtin_ctz(input) ) + 1;
-}
-
-__device__ static inline unsigned int __ffsll(unsigned long long int input) {
- return ( input == 0 ? -1 : __builtin_ctzll(input) ) + 1;
-}
-
-__device__ static inline unsigned int __ffs(int input) {
- return ( input == 0 ? -1 : __builtin_ctz(input) ) + 1;
-}
-
-__device__ static inline unsigned int __ffsll(long long int input) {
- return ( input == 0 ? -1 : __builtin_ctzll(input) ) + 1;
-}
-
-__device__ static inline unsigned int __brev(unsigned int input) {
- return __builtin_bitreverse32(input);
-}
-
-__device__ static inline unsigned long long int __brevll(unsigned long long int input) {
- return __builtin_bitreverse64(input);
-}
-
-__device__ static inline unsigned int __lastbit_u32_u64(uint64_t input) {
- return input == 0 ? -1 : __builtin_ctzl(input);
-}
-
-__device__ static inline unsigned int __bitextract_u32(unsigned int src0, unsigned int src1, unsigned int src2) {
- uint32_t offset = src1 & 31;
- uint32_t width = src2 & 31;
- return width == 0 ? 0 : (src0 << (32 - offset - width)) >> (32 - width);
-}
-
-__device__ static inline uint64_t __bitextract_u64(uint64_t src0, unsigned int src1, unsigned int src2) {
- uint64_t offset = src1 & 63;
- uint64_t width = src2 & 63;
- return width == 0 ? 0 : (src0 << (64 - offset - width)) >> (64 - width);
-}
-
-__device__ static inline unsigned int __bitinsert_u32(unsigned int src0, unsigned int src1, unsigned int src2, unsigned int src3) {
- uint32_t offset = src2 & 31;
- uint32_t width = src3 & 31;
- uint32_t mask = (1 << width) - 1;
- return ((src0 & ~(mask << offset)) | ((src1 & mask) << offset));
-}
-
-__device__ static inline uint64_t __bitinsert_u64(uint64_t src0, uint64_t src1, unsigned int src2, unsigned int src3) {
- uint64_t offset = src2 & 63;
- uint64_t width = src3 & 63;
- uint64_t mask = (1ULL << width) - 1;
- return ((src0 & ~(mask << offset)) | ((src1 & mask) << offset));
-}
-
-__device__ static unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int s);
-__device__ static unsigned int __hadd(int x, int y);
-__device__ static int __mul24(int x, int y);
-__device__ static long long int __mul64hi(long long int x, long long int y);
-__device__ static int __mulhi(int x, int y);
-__device__ static int __rhadd(int x, int y);
-__device__ static unsigned int __sad(int x, int y,unsigned int z);
-__device__ static unsigned int __uhadd(unsigned int x, unsigned int y);
-__device__ static int __umul24(unsigned int x, unsigned int y);
-__device__ static unsigned long long int __umul64hi(unsigned long long int x, unsigned long long int y);
-__device__ static unsigned int __umulhi(unsigned int x, unsigned int y);
-__device__ static unsigned int __urhadd(unsigned int x, unsigned int y);
-__device__ static unsigned int __usad(unsigned int x, unsigned int y, unsigned int z);
-
-struct ucharHolder {
- union {
- unsigned char c[4];
- unsigned int ui;
- };
-} __attribute__((aligned(4)));
-
-struct uchar2Holder {
- union {
- unsigned int ui[2];
- unsigned char c[8];
- };
-} __attribute__((aligned(8)));
-
-__device__
-static inline unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int s) {
- struct uchar2Holder cHoldVal;
- struct ucharHolder cHoldKey;
- struct ucharHolder cHoldOut;
- cHoldKey.ui = s;
- cHoldVal.ui[0] = x;
- cHoldVal.ui[1] = y;
- cHoldOut.c[0] = cHoldVal.c[cHoldKey.c[0]];
- cHoldOut.c[1] = cHoldVal.c[cHoldKey.c[1]];
- cHoldOut.c[2] = cHoldVal.c[cHoldKey.c[2]];
- cHoldOut.c[3] = cHoldVal.c[cHoldKey.c[3]];
- return cHoldOut.ui;
-}
-
-__device__ static inline unsigned int __hadd(int x, int y) {
- int z = x + y;
- int sign = z & 0x8000000;
- int value = z & 0x7FFFFFFF;
- return ((value) >> 1 || sign);
-}
-
-__device__ static inline int __mul24(int x, int y) {
- return __ockl_mul24_i32(x, y);
-}
-
-__device__ static inline long long __mul64hi(long long int x, long long int y) {
- ulong x0 = (ulong)x & 0xffffffffUL;
- long x1 = x >> 32;
- ulong y0 = (ulong)y & 0xffffffffUL;
- long y1 = y >> 32;
- ulong z0 = x0*y0;
- long t = x1*y0 + (z0 >> 32);
- long z1 = t & 0xffffffffL;
- long z2 = t >> 32;
- z1 = x0*y1 + z1;
- return x1*y1 + z2 + (z1 >> 32);
-}
-
-__device__ static inline int __mulhi(int x, int y) {
- return __ockl_mul_hi_i32(x, y);
-}
-
-__device__ static inline int __rhadd(int x, int y) {
- int z = x + y + 1;
- int sign = z & 0x8000000;
- int value = z & 0x7FFFFFFF;
- return ((value) >> 1 || sign);
-}
-__device__ static inline unsigned int __sad(int x, int y, unsigned int z) {
- return x > y ? x - y + z : y - x + z;
-}
-__device__ static inline unsigned int __uhadd(unsigned int x, unsigned int y) {
- return (x + y) >> 1;
-}
-__device__ static inline int __umul24(unsigned int x, unsigned int y) {
- return __ockl_mul24_u32(x, y);
-}
-
-__device__
-static inline unsigned long long __umul64hi(unsigned long long int x, unsigned long long int y) {
- ulong x0 = x & 0xffffffffUL;
- ulong x1 = x >> 32;
- ulong y0 = y & 0xffffffffUL;
- ulong y1 = y >> 32;
- ulong z0 = x0*y0;
- ulong t = x1*y0 + (z0 >> 32);
- ulong z1 = t & 0xffffffffUL;
- ulong z2 = t >> 32;
- z1 = x0*y1 + z1;
- return x1*y1 + z2 + (z1 >> 32);
-}
-
-__device__ static inline unsigned int __umulhi(unsigned int x, unsigned int y) {
- return __ockl_mul_hi_u32(x, y);
-}
-__device__ static inline unsigned int __urhadd(unsigned int x, unsigned int y) {
- return (x + y + 1) >> 1;
-}
-__device__ static inline unsigned int __usad(unsigned int x, unsigned int y, unsigned int z) {
- return __ockl_sadd_u32(x, y, z);
-}
-
-__device__ static inline unsigned int __lane_id() {
- return __builtin_amdgcn_mbcnt_hi(
- -1, __builtin_amdgcn_mbcnt_lo(-1, 0));
-}
-
-__device__
-static inline unsigned int __mbcnt_lo(unsigned int x, unsigned int y) {return __builtin_amdgcn_mbcnt_lo(x,y);};
-
-__device__
-static inline unsigned int __mbcnt_hi(unsigned int x, unsigned int y) {return __builtin_amdgcn_mbcnt_hi(x,y);};
-
-/*
-HIP specific device functions
-*/
-
-__device__ static inline unsigned __hip_ds_bpermute(int index, unsigned src) {
- union { int i; unsigned u; float f; } tmp; tmp.u = src;
- tmp.i = __builtin_amdgcn_ds_bpermute(index, tmp.i);
- return tmp.u;
-}
-
-__device__ static inline float __hip_ds_bpermutef(int index, float src) {
- union { int i; unsigned u; float f; } tmp; tmp.f = src;
- tmp.i = __builtin_amdgcn_ds_bpermute(index, tmp.i);
- return tmp.f;
-}
-
-__device__ static inline unsigned __hip_ds_permute(int index, unsigned src) {
- union { int i; unsigned u; float f; } tmp; tmp.u = src;
- tmp.i = __builtin_amdgcn_ds_permute(index, tmp.i);
- return tmp.u;
-}
-
-__device__ static inline float __hip_ds_permutef(int index, float src) {
- union { int i; unsigned u; float f; } tmp; tmp.u = src;
- tmp.i = __builtin_amdgcn_ds_permute(index, tmp.i);
- return tmp.u;
-}
-
-#define __hip_ds_swizzle(src, pattern) __hip_ds_swizzle_N<(pattern)>((src))
-#define __hip_ds_swizzlef(src, pattern) __hip_ds_swizzlef_N<(pattern)>((src))
-
-template <int pattern>
-__device__ static inline unsigned __hip_ds_swizzle_N(unsigned int src) {
- union { int i; unsigned u; float f; } tmp; tmp.u = src;
-#if defined(__HCC__)
- tmp.i = __llvm_amdgcn_ds_swizzle(tmp.i, pattern);
-#else
- tmp.i = __builtin_amdgcn_ds_swizzle(tmp.i, pattern);
-#endif
- return tmp.u;
-}
-
-template <int pattern>
-__device__ static inline float __hip_ds_swizzlef_N(float src) {
- union { int i; unsigned u; float f; } tmp; tmp.f = src;
-#if defined(__HCC__)
- tmp.i = __llvm_amdgcn_ds_swizzle(tmp.i, pattern);
-#else
- tmp.i = __builtin_amdgcn_ds_swizzle(tmp.i, pattern);
-#endif
- return tmp.f;
-}
-
-#define __hip_move_dpp(src, dpp_ctrl, row_mask, bank_mask, bound_ctrl) \
- __hip_move_dpp_N<(dpp_ctrl), (row_mask), (bank_mask), (bound_ctrl)>((src))
-
-template <int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl>
-__device__ static inline int __hip_move_dpp_N(int src) {
- return __builtin_amdgcn_mov_dpp(src, dpp_ctrl, row_mask, bank_mask,
- bound_ctrl);
-}
-
-// FIXME: Remove the following workaround once the clang change is released.
-// This is for backward compatibility with older clang which does not define
-// __AMDGCN_WAVEFRONT_SIZE. It does not consider -mwavefrontsize64.
-#ifndef __AMDGCN_WAVEFRONT_SIZE
-#if __gfx1010__ || __gfx1011__ || __gfx1012__ || __gfx1030__ || __gfx1031__
-#define __AMDGCN_WAVEFRONT_SIZE 32
-#else
-#define __AMDGCN_WAVEFRONT_SIZE 64
-#endif
-#endif
-static constexpr int warpSize = __AMDGCN_WAVEFRONT_SIZE;
-
-__device__
-inline
-int __shfl(int var, int src_lane, int width = warpSize) {
- int self = __lane_id();
- int index = src_lane + (self & ~(width-1));
- return __builtin_amdgcn_ds_bpermute(index<<2, var);
-}
-__device__
-inline
-unsigned int __shfl(unsigned int var, int src_lane, int width = warpSize) {
- union { int i; unsigned u; float f; } tmp; tmp.u = var;
- tmp.i = __shfl(tmp.i, src_lane, width);
- return tmp.u;
-}
-__device__
-inline
-float __shfl(float var, int src_lane, int width = warpSize) {
- union { int i; unsigned u; float f; } tmp; tmp.f = var;
- tmp.i = __shfl(tmp.i, src_lane, width);
- return tmp.f;
-}
-__device__
-inline
-double __shfl(double var, int src_lane, int width = warpSize) {
- static_assert(sizeof(double) == 2 * sizeof(int), "");
- static_assert(sizeof(double) == sizeof(uint64_t), "");
-
- int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl(tmp[0], src_lane, width);
- tmp[1] = __shfl(tmp[1], src_lane, width);
-
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- double tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
-}
-__device__
-inline
-long __shfl(long var, int src_lane, int width = warpSize)
-{
- #ifndef _MSC_VER
- static_assert(sizeof(long) == 2 * sizeof(int), "");
- static_assert(sizeof(long) == sizeof(uint64_t), "");
-
- int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl(tmp[0], src_lane, width);
- tmp[1] = __shfl(tmp[1], src_lane, width);
-
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
- #else
- static_assert(sizeof(long) == sizeof(int), "");
- return static_cast<long>(__shfl(static_cast<int>(var), src_lane, width));
- #endif
-}
-__device__
-inline
-unsigned long __shfl(unsigned long var, int src_lane, int width = warpSize) {
- #ifndef _MSC_VER
- static_assert(sizeof(unsigned long) == 2 * sizeof(unsigned int), "");
- static_assert(sizeof(unsigned long) == sizeof(uint64_t), "");
-
- unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl(tmp[0], src_lane, width);
- tmp[1] = __shfl(tmp[1], src_lane, width);
-
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- unsigned long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
- #else
- static_assert(sizeof(unsigned long) == sizeof(unsigned int), "");
- return static_cast<unsigned long>(__shfl(static_cast<unsigned int>(var), src_lane, width));
- #endif
-}
-__device__
-inline
-long long __shfl(long long var, int src_lane, int width = warpSize)
-{
- static_assert(sizeof(long long) == 2 * sizeof(int), "");
- static_assert(sizeof(long long) == sizeof(uint64_t), "");
-
- int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl(tmp[0], src_lane, width);
- tmp[1] = __shfl(tmp[1], src_lane, width);
-
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- long long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
-}
-__device__
-inline
-unsigned long long __shfl(unsigned long long var, int src_lane, int width = warpSize) {
- static_assert(sizeof(unsigned long long) == 2 * sizeof(unsigned int), "");
- static_assert(sizeof(unsigned long long) == sizeof(uint64_t), "");
-
- unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl(tmp[0], src_lane, width);
- tmp[1] = __shfl(tmp[1], src_lane, width);
-
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- unsigned long long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
-}
-
-__device__
-inline
-int __shfl_up(int var, unsigned int lane_delta, int width = warpSize) {
- int self = __lane_id();
- int index = self - lane_delta;
- index = (index < (self & ~(width-1)))?self:index;
- return __builtin_amdgcn_ds_bpermute(index<<2, var);
-}
-__device__
-inline
-unsigned int __shfl_up(unsigned int var, unsigned int lane_delta, int width = warpSize) {
- union { int i; unsigned u; float f; } tmp; tmp.u = var;
- tmp.i = __shfl_up(tmp.i, lane_delta, width);
- return tmp.u;
-}
-__device__
-inline
-float __shfl_up(float var, unsigned int lane_delta, int width = warpSize) {
- union { int i; unsigned u; float f; } tmp; tmp.f = var;
- tmp.i = __shfl_up(tmp.i, lane_delta, width);
- return tmp.f;
-}
-__device__
-inline
-double __shfl_up(double var, unsigned int lane_delta, int width = warpSize) {
- static_assert(sizeof(double) == 2 * sizeof(int), "");
- static_assert(sizeof(double) == sizeof(uint64_t), "");
-
- int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl_up(tmp[0], lane_delta, width);
- tmp[1] = __shfl_up(tmp[1], lane_delta, width);
-
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- double tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
-}
-__device__
-inline
-long __shfl_up(long var, unsigned int lane_delta, int width = warpSize)
-{
- #ifndef _MSC_VER
- static_assert(sizeof(long) == 2 * sizeof(int), "");
- static_assert(sizeof(long) == sizeof(uint64_t), "");
-
- int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl_up(tmp[0], lane_delta, width);
- tmp[1] = __shfl_up(tmp[1], lane_delta, width);
-
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
- #else
- static_assert(sizeof(long) == sizeof(int), "");
- return static_cast<long>(__shfl_up(static_cast<int>(var), lane_delta, width));
- #endif
-}
-
-__device__
-inline
-unsigned long __shfl_up(unsigned long var, unsigned int lane_delta, int width = warpSize)
-{
- #ifndef _MSC_VER
- static_assert(sizeof(unsigned long) == 2 * sizeof(unsigned int), "");
- static_assert(sizeof(unsigned long) == sizeof(uint64_t), "");
-
- unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl_up(tmp[0], lane_delta, width);
- tmp[1] = __shfl_up(tmp[1], lane_delta, width);
-
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- unsigned long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
- #else
- static_assert(sizeof(unsigned long) == sizeof(unsigned int), "");
- return static_cast<unsigned long>(__shfl_up(static_cast<unsigned int>(var), lane_delta, width));
- #endif
-}
-
-__device__
-inline
-long long __shfl_up(long long var, unsigned int lane_delta, int width = warpSize)
-{
- static_assert(sizeof(long long) == 2 * sizeof(int), "");
- static_assert(sizeof(long long) == sizeof(uint64_t), "");
- int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl_up(tmp[0], lane_delta, width);
- tmp[1] = __shfl_up(tmp[1], lane_delta, width);
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- long long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
-}
-
-__device__
-inline
-unsigned long long __shfl_up(unsigned long long var, unsigned int lane_delta, int width = warpSize)
-{
- static_assert(sizeof(unsigned long long) == 2 * sizeof(unsigned int), "");
- static_assert(sizeof(unsigned long long) == sizeof(uint64_t), "");
- unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl_up(tmp[0], lane_delta, width);
- tmp[1] = __shfl_up(tmp[1], lane_delta, width);
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- unsigned long long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
-}
-
-__device__
-inline
-int __shfl_down(int var, unsigned int lane_delta, int width = warpSize) {
- int self = __lane_id();
- int index = self + lane_delta;
- index = (int)((self&(width-1))+lane_delta) >= width?self:index;
- return __builtin_amdgcn_ds_bpermute(index<<2, var);
-}
-__device__
-inline
-unsigned int __shfl_down(unsigned int var, unsigned int lane_delta, int width = warpSize) {
- union { int i; unsigned u; float f; } tmp; tmp.u = var;
- tmp.i = __shfl_down(tmp.i, lane_delta, width);
- return tmp.u;
-}
-__device__
-inline
-float __shfl_down(float var, unsigned int lane_delta, int width = warpSize) {
- union { int i; unsigned u; float f; } tmp; tmp.f = var;
- tmp.i = __shfl_down(tmp.i, lane_delta, width);
- return tmp.f;
-}
-__device__
-inline
-double __shfl_down(double var, unsigned int lane_delta, int width = warpSize) {
- static_assert(sizeof(double) == 2 * sizeof(int), "");
- static_assert(sizeof(double) == sizeof(uint64_t), "");
-
- int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl_down(tmp[0], lane_delta, width);
- tmp[1] = __shfl_down(tmp[1], lane_delta, width);
-
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- double tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
-}
-__device__
-inline
-long __shfl_down(long var, unsigned int lane_delta, int width = warpSize)
-{
- #ifndef _MSC_VER
- static_assert(sizeof(long) == 2 * sizeof(int), "");
- static_assert(sizeof(long) == sizeof(uint64_t), "");
-
- int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl_down(tmp[0], lane_delta, width);
- tmp[1] = __shfl_down(tmp[1], lane_delta, width);
-
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
- #else
- static_assert(sizeof(long) == sizeof(int), "");
- return static_cast<long>(__shfl_down(static_cast<int>(var), lane_delta, width));
- #endif
-}
-__device__
-inline
-unsigned long __shfl_down(unsigned long var, unsigned int lane_delta, int width = warpSize)
-{
- #ifndef _MSC_VER
- static_assert(sizeof(unsigned long) == 2 * sizeof(unsigned int), "");
- static_assert(sizeof(unsigned long) == sizeof(uint64_t), "");
-
- unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl_down(tmp[0], lane_delta, width);
- tmp[1] = __shfl_down(tmp[1], lane_delta, width);
-
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- unsigned long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
- #else
- static_assert(sizeof(unsigned long) == sizeof(unsigned int), "");
- return static_cast<unsigned long>(__shfl_down(static_cast<unsigned int>(var), lane_delta, width));
- #endif
-}
-__device__
-inline
-long long __shfl_down(long long var, unsigned int lane_delta, int width = warpSize)
-{
- static_assert(sizeof(long long) == 2 * sizeof(int), "");
- static_assert(sizeof(long long) == sizeof(uint64_t), "");
- int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl_down(tmp[0], lane_delta, width);
- tmp[1] = __shfl_down(tmp[1], lane_delta, width);
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- long long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
-}
-__device__
-inline
-unsigned long long __shfl_down(unsigned long long var, unsigned int lane_delta, int width = warpSize)
-{
- static_assert(sizeof(unsigned long long) == 2 * sizeof(unsigned int), "");
- static_assert(sizeof(unsigned long long) == sizeof(uint64_t), "");
- unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl_down(tmp[0], lane_delta, width);
- tmp[1] = __shfl_down(tmp[1], lane_delta, width);
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- unsigned long long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
-}
-
-__device__
-inline
-int __shfl_xor(int var, int lane_mask, int width = warpSize) {
- int self = __lane_id();
- int index = self^lane_mask;
- index = index >= ((self+width)&~(width-1))?self:index;
- return __builtin_amdgcn_ds_bpermute(index<<2, var);
-}
-__device__
-inline
-unsigned int __shfl_xor(unsigned int var, int lane_mask, int width = warpSize) {
- union { int i; unsigned u; float f; } tmp; tmp.u = var;
- tmp.i = __shfl_xor(tmp.i, lane_mask, width);
- return tmp.u;
-}
-__device__
-inline
-float __shfl_xor(float var, int lane_mask, int width = warpSize) {
- union { int i; unsigned u; float f; } tmp; tmp.f = var;
- tmp.i = __shfl_xor(tmp.i, lane_mask, width);
- return tmp.f;
-}
-__device__
-inline
-double __shfl_xor(double var, int lane_mask, int width = warpSize) {
- static_assert(sizeof(double) == 2 * sizeof(int), "");
- static_assert(sizeof(double) == sizeof(uint64_t), "");
-
- int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl_xor(tmp[0], lane_mask, width);
- tmp[1] = __shfl_xor(tmp[1], lane_mask, width);
-
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- double tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
-}
-__device__
-inline
-long __shfl_xor(long var, int lane_mask, int width = warpSize)
-{
- #ifndef _MSC_VER
- static_assert(sizeof(long) == 2 * sizeof(int), "");
- static_assert(sizeof(long) == sizeof(uint64_t), "");
-
- int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl_xor(tmp[0], lane_mask, width);
- tmp[1] = __shfl_xor(tmp[1], lane_mask, width);
-
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
- #else
- static_assert(sizeof(long) == sizeof(int), "");
- return static_cast<long>(__shfl_xor(static_cast<int>(var), lane_mask, width));
- #endif
-}
-__device__
-inline
-unsigned long __shfl_xor(unsigned long var, int lane_mask, int width = warpSize)
-{
- #ifndef _MSC_VER
- static_assert(sizeof(unsigned long) == 2 * sizeof(unsigned int), "");
- static_assert(sizeof(unsigned long) == sizeof(uint64_t), "");
-
- unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl_xor(tmp[0], lane_mask, width);
- tmp[1] = __shfl_xor(tmp[1], lane_mask, width);
-
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- unsigned long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
- #else
- static_assert(sizeof(unsigned long) == sizeof(unsigned int), "");
- return static_cast<unsigned long>(__shfl_xor(static_cast<unsigned int>(var), lane_mask, width));
- #endif
-}
-__device__
-inline
-long long __shfl_xor(long long var, int lane_mask, int width = warpSize)
-{
- static_assert(sizeof(long long) == 2 * sizeof(int), "");
- static_assert(sizeof(long long) == sizeof(uint64_t), "");
- int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl_xor(tmp[0], lane_mask, width);
- tmp[1] = __shfl_xor(tmp[1], lane_mask, width);
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- long long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
-}
-__device__
-inline
-unsigned long long __shfl_xor(unsigned long long var, int lane_mask, int width = warpSize)
-{
- static_assert(sizeof(unsigned long long) == 2 * sizeof(unsigned int), "");
- static_assert(sizeof(unsigned long long) == sizeof(uint64_t), "");
- unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl_xor(tmp[0], lane_mask, width);
- tmp[1] = __shfl_xor(tmp[1], lane_mask, width);
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- unsigned long long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
-}
-#define MASK1 0x00ff00ff
-#define MASK2 0xff00ff00
-
-__device__ static inline char4 __hip_hc_add8pk(char4 in1, char4 in2) {
- char4 out;
- unsigned one1 = in1.w & MASK1;
- unsigned one2 = in2.w & MASK1;
- out.w = (one1 + one2) & MASK1;
- one1 = in1.w & MASK2;
- one2 = in2.w & MASK2;
- out.w = out.w | ((one1 + one2) & MASK2);
- return out;
-}
-
-__device__ static inline char4 __hip_hc_sub8pk(char4 in1, char4 in2) {
- char4 out;
- unsigned one1 = in1.w & MASK1;
- unsigned one2 = in2.w & MASK1;
- out.w = (one1 - one2) & MASK1;
- one1 = in1.w & MASK2;
- one2 = in2.w & MASK2;
- out.w = out.w | ((one1 - one2) & MASK2);
- return out;
-}
-
-__device__ static inline char4 __hip_hc_mul8pk(char4 in1, char4 in2) {
- char4 out;
- unsigned one1 = in1.w & MASK1;
- unsigned one2 = in2.w & MASK1;
- out.w = (one1 * one2) & MASK1;
- one1 = in1.w & MASK2;
- one2 = in2.w & MASK2;
- out.w = out.w | ((one1 * one2) & MASK2);
- return out;
-}
-
-/*
- * Rounding modes are not yet supported in HIP
- * TODO: Conversion functions are not correct, need to fix when BE is ready
-*/
-
-__device__ static inline float __double2float_rd(double x) { return (double)x; }
-__device__ static inline float __double2float_rn(double x) { return (double)x; }
-__device__ static inline float __double2float_ru(double x) { return (double)x; }
-__device__ static inline float __double2float_rz(double x) { return (double)x; }
-
-__device__ static inline int __double2hiint(double x) {
- static_assert(sizeof(double) == 2 * sizeof(int), "");
-
- int tmp[2];
- __builtin_memcpy(tmp, &x, sizeof(tmp));
-
- return tmp[1];
-}
-__device__ static inline int __double2loint(double x) {
- static_assert(sizeof(double) == 2 * sizeof(int), "");
-
- int tmp[2];
- __builtin_memcpy(tmp, &x, sizeof(tmp));
-
- return tmp[0];
-}
-
-__device__ static inline int __double2int_rd(double x) { return (int)x; }
-__device__ static inline int __double2int_rn(double x) { return (int)x; }
-__device__ static inline int __double2int_ru(double x) { return (int)x; }
-__device__ static inline int __double2int_rz(double x) { return (int)x; }
-
-__device__ static inline long long int __double2ll_rd(double x) { return (long long int)x; }
-__device__ static inline long long int __double2ll_rn(double x) { return (long long int)x; }
-__device__ static inline long long int __double2ll_ru(double x) { return (long long int)x; }
-__device__ static inline long long int __double2ll_rz(double x) { return (long long int)x; }
-
-__device__ static inline unsigned int __double2uint_rd(double x) { return (unsigned int)x; }
-__device__ static inline unsigned int __double2uint_rn(double x) { return (unsigned int)x; }
-__device__ static inline unsigned int __double2uint_ru(double x) { return (unsigned int)x; }
-__device__ static inline unsigned int __double2uint_rz(double x) { return (unsigned int)x; }
-
-__device__ static inline unsigned long long int __double2ull_rd(double x) {
- return (unsigned long long int)x;
-}
-__device__ static inline unsigned long long int __double2ull_rn(double x) {
- return (unsigned long long int)x;
-}
-__device__ static inline unsigned long long int __double2ull_ru(double x) {
- return (unsigned long long int)x;
-}
-__device__ static inline unsigned long long int __double2ull_rz(double x) {
- return (unsigned long long int)x;
-}
-
-__device__ static inline long long int __double_as_longlong(double x) {
- static_assert(sizeof(long long) == sizeof(double), "");
-
- long long tmp;
- __builtin_memcpy(&tmp, &x, sizeof(tmp));
-
- return tmp;
-}
-
-/*
-__device__ unsigned short __float2half_rn(float x);
-__device__ float __half2float(unsigned short);
-
-The above device function are not a valid .
-Use
-__device__ __half __float2half_rn(float x);
-__device__ float __half2float(__half);
-from hip_fp16.h
-
-CUDA implements half as unsigned short whereas, HIP doesn't.
-
-*/
-
-__device__ static inline int __float2int_rd(float x) { return (int)__ocml_floor_f32(x); }
-__device__ static inline int __float2int_rn(float x) { return (int)__ocml_rint_f32(x); }
-__device__ static inline int __float2int_ru(float x) { return (int)__ocml_ceil_f32(x); }
-__device__ static inline int __float2int_rz(float x) { return (int)__ocml_trunc_f32(x); }
-
-__device__ static inline long long int __float2ll_rd(float x) { return (long long int)x; }
-__device__ static inline long long int __float2ll_rn(float x) { return (long long int)x; }
-__device__ static inline long long int __float2ll_ru(float x) { return (long long int)x; }
-__device__ static inline long long int __float2ll_rz(float x) { return (long long int)x; }
-
-__device__ static inline unsigned int __float2uint_rd(float x) { return (unsigned int)x; }
-__device__ static inline unsigned int __float2uint_rn(float x) { return (unsigned int)x; }
-__device__ static inline unsigned int __float2uint_ru(float x) { return (unsigned int)x; }
-__device__ static inline unsigned int __float2uint_rz(float x) { return (unsigned int)x; }
-
-__device__ static inline unsigned long long int __float2ull_rd(float x) {
- return (unsigned long long int)x;
-}
-__device__ static inline unsigned long long int __float2ull_rn(float x) {
- return (unsigned long long int)x;
-}
-__device__ static inline unsigned long long int __float2ull_ru(float x) {
- return (unsigned long long int)x;
-}
-__device__ static inline unsigned long long int __float2ull_rz(float x) {
- return (unsigned long long int)x;
-}
-
-__device__ static inline int __float_as_int(float x) {
- static_assert(sizeof(int) == sizeof(float), "");
-
- int tmp;
- __builtin_memcpy(&tmp, &x, sizeof(tmp));
-
- return tmp;
-}
-
-__device__ static inline unsigned int __float_as_uint(float x) {
- static_assert(sizeof(unsigned int) == sizeof(float), "");
-
- unsigned int tmp;
- __builtin_memcpy(&tmp, &x, sizeof(tmp));
-
- return tmp;
-}
-
-__device__ static inline double __hiloint2double(int hi, int lo) {
- static_assert(sizeof(double) == sizeof(uint64_t), "");
-
- uint64_t tmp0 = (static_cast<uint64_t>(hi) << 32ull) | static_cast<uint32_t>(lo);
- double tmp1;
- __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
-
- return tmp1;
-}
-
-__device__ static inline double __int2double_rn(int x) { return (double)x; }
-
-__device__ static inline float __int2float_rd(int x) { return (float)x; }
-__device__ static inline float __int2float_rn(int x) { return (float)x; }
-__device__ static inline float __int2float_ru(int x) { return (float)x; }
-__device__ static inline float __int2float_rz(int x) { return (float)x; }
-
-__device__ static inline float __int_as_float(int x) {
- static_assert(sizeof(float) == sizeof(int), "");
-
- float tmp;
- __builtin_memcpy(&tmp, &x, sizeof(tmp));
-
- return tmp;
-}
-
-__device__ static inline double __ll2double_rd(long long int x) { return (double)x; }
-__device__ static inline double __ll2double_rn(long long int x) { return (double)x; }
-__device__ static inline double __ll2double_ru(long long int x) { return (double)x; }
-__device__ static inline double __ll2double_rz(long long int x) { return (double)x; }
-
-__device__ static inline float __ll2float_rd(long long int x) { return (float)x; }
-__device__ static inline float __ll2float_rn(long long int x) { return (float)x; }
-__device__ static inline float __ll2float_ru(long long int x) { return (float)x; }
-__device__ static inline float __ll2float_rz(long long int x) { return (float)x; }
-
-__device__ static inline double __longlong_as_double(long long int x) {
- static_assert(sizeof(double) == sizeof(long long), "");
-
- double tmp;
- __builtin_memcpy(&tmp, &x, sizeof(tmp));
-
- return tmp;
-}
-
-__device__ static inline double __uint2double_rn(int x) { return (double)x; }
-
-__device__ static inline float __uint2float_rd(unsigned int x) { return (float)x; }
-__device__ static inline float __uint2float_rn(unsigned int x) { return (float)x; }
-__device__ static inline float __uint2float_ru(unsigned int x) { return (float)x; }
-__device__ static inline float __uint2float_rz(unsigned int x) { return (float)x; }
-
-__device__ static inline float __uint_as_float(unsigned int x) {
- static_assert(sizeof(float) == sizeof(unsigned int), "");
-
- float tmp;
- __builtin_memcpy(&tmp, &x, sizeof(tmp));
-
- return tmp;
-}
-
-__device__ static inline double __ull2double_rd(unsigned long long int x) { return (double)x; }
-__device__ static inline double __ull2double_rn(unsigned long long int x) { return (double)x; }
-__device__ static inline double __ull2double_ru(unsigned long long int x) { return (double)x; }
-__device__ static inline double __ull2double_rz(unsigned long long int x) { return (double)x; }
-
-__device__ static inline float __ull2float_rd(unsigned long long int x) { return (float)x; }
-__device__ static inline float __ull2float_rn(unsigned long long int x) { return (float)x; }
-__device__ static inline float __ull2float_ru(unsigned long long int x) { return (float)x; }
-__device__ static inline float __ull2float_rz(unsigned long long int x) { return (float)x; }
-
-#if defined(__HCC__)
-#define __HCC_OR_HIP_CLANG__ 1
-#elif defined(__clang__) && defined(__HIP__)
-#define __HCC_OR_HIP_CLANG__ 1
-#else
-#define __HCC_OR_HIP_CLANG__ 0
-#endif
-
-#if __HCC_OR_HIP_CLANG__
-
-// Clock functions
-__device__ long long int __clock64();
-__device__ long long int __clock();
-__device__ long long int clock64();
-__device__ long long int clock();
-// hip.amdgcn.bc - named sync
-__device__ void __named_sync(int a, int b);
-
-#ifdef __HIP_DEVICE_COMPILE__
-
-// Clock functions
-#if __HCC__
-extern "C" uint64_t __clock_u64() __HC__;
-#endif
-
-__device__
-inline __attribute((always_inline))
-long long int __clock64() {
-return (long long int) __builtin_readcyclecounter();
-}
-
-__device__
-inline __attribute((always_inline))
-long long int __clock() { return __clock64(); }
-
-__device__
-inline __attribute__((always_inline))
-long long int clock64() { return __clock64(); }
-
-__device__
-inline __attribute__((always_inline))
-long long int clock() { return __clock(); }
-
-// hip.amdgcn.bc - named sync
-__device__
-inline
-void __named_sync(int a, int b) { __builtin_amdgcn_s_barrier(); }
-
-#endif // __HIP_DEVICE_COMPILE__
-
-// warp vote function __all __any __ballot
-__device__
-inline
-int __all(int predicate) {
- return __ockl_wfall_i32(predicate);
-}
-
-__device__
-inline
-int __any(int predicate) {
- return __ockl_wfany_i32(predicate);
-}
-
-// XXX from llvm/include/llvm/IR/InstrTypes.h
-#define ICMP_NE 33
-
-__device__
-inline
-unsigned long long int __ballot(int predicate) {
- return __builtin_amdgcn_uicmp(predicate, 0, ICMP_NE);
-}
-
-__device__
-inline
-unsigned long long int __ballot64(int predicate) {
- return __builtin_amdgcn_uicmp(predicate, 0, ICMP_NE);
-}
-
-// hip.amdgcn.bc - lanemask
-__device__
-inline
-uint64_t __lanemask_gt()
-{
- uint32_t lane = __ockl_lane_u32();
- if (lane == 63)
- return 0;
- uint64_t ballot = __ballot64(1);
- uint64_t mask = (~((uint64_t)0)) << (lane + 1);
- return mask & ballot;
-}
-
-__device__
-inline
-uint64_t __lanemask_lt()
-{
- uint32_t lane = __ockl_lane_u32();
- int64_t ballot = __ballot64(1);
- uint64_t mask = ((uint64_t)1 << lane) - (uint64_t)1;
- return mask & ballot;
-}
-
-__device__
-inline
-uint64_t __lanemask_eq()
-{
- uint32_t lane = __ockl_lane_u32();
- int64_t mask = ((uint64_t)1 << lane);
- return mask;
-}
-
-
-__device__ inline void* __local_to_generic(void* p) { return p; }
-
-#ifdef __HIP_DEVICE_COMPILE__
-__device__
-inline
-void* __get_dynamicgroupbaseptr()
-{
- // Get group segment base pointer.
- return (char*)__local_to_generic((void*)__to_local(__llvm_amdgcn_groupstaticsize()));
-}
-#else
-__device__
-void* __get_dynamicgroupbaseptr();
-#endif // __HIP_DEVICE_COMPILE__
-
-__device__
-inline
-void *__amdgcn_get_dynamicgroupbaseptr() {
- return __get_dynamicgroupbaseptr();
-}
-
-#if defined(__HCC__) && (__hcc_major__ < 3) && (__hcc_minor__ < 3)
-// hip.amdgcn.bc - sync threads
-#define __CLK_LOCAL_MEM_FENCE 0x01
-typedef unsigned __cl_mem_fence_flags;
-
-typedef enum __memory_scope {
- __memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
- __memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
- __memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
- __memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
- __memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
-} __memory_scope;
-
-// enum values aligned with what clang uses in EmitAtomicExpr()
-typedef enum __memory_order
-{
- __memory_order_relaxed = __ATOMIC_RELAXED,
- __memory_order_acquire = __ATOMIC_ACQUIRE,
- __memory_order_release = __ATOMIC_RELEASE,
- __memory_order_acq_rel = __ATOMIC_ACQ_REL,
- __memory_order_seq_cst = __ATOMIC_SEQ_CST
-} __memory_order;
-
-__device__
-inline
-static void
-__atomic_work_item_fence(__cl_mem_fence_flags flags, __memory_order order, __memory_scope scope)
-{
- // We're tying global-happens-before and local-happens-before together as does HSA
- if (order != __memory_order_relaxed) {
- switch (scope) {
- case __memory_scope_work_item:
- break;
- case __memory_scope_sub_group:
- switch (order) {
- case __memory_order_relaxed: break;
- case __memory_order_acquire: __llvm_fence_acq_sg(); break;
- case __memory_order_release: __llvm_fence_rel_sg(); break;
- case __memory_order_acq_rel: __llvm_fence_ar_sg(); break;
- case __memory_order_seq_cst: __llvm_fence_sc_sg(); break;
- }
- break;
- case __memory_scope_work_group:
- switch (order) {
- case __memory_order_relaxed: break;
- case __memory_order_acquire: __llvm_fence_acq_wg(); break;
- case __memory_order_release: __llvm_fence_rel_wg(); break;
- case __memory_order_acq_rel: __llvm_fence_ar_wg(); break;
- case __memory_order_seq_cst: __llvm_fence_sc_wg(); break;
- }
- break;
- case __memory_scope_device:
- switch (order) {
- case __memory_order_relaxed: break;
- case __memory_order_acquire: __llvm_fence_acq_dev(); break;
- case __memory_order_release: __llvm_fence_rel_dev(); break;
- case __memory_order_acq_rel: __llvm_fence_ar_dev(); break;
- case __memory_order_seq_cst: __llvm_fence_sc_dev(); break;
- }
- break;
- case __memory_scope_all_svm_devices:
- switch (order) {
- case __memory_order_relaxed: break;
- case __memory_order_acquire: __llvm_fence_acq_sys(); break;
- case __memory_order_release: __llvm_fence_rel_sys(); break;
- case __memory_order_acq_rel: __llvm_fence_ar_sys(); break;
- case __memory_order_seq_cst: __llvm_fence_sc_sys(); break;
- }
- break;
- }
- }
-}
-#endif
-
-// Memory Fence Functions
-__device__
-inline
-static void __threadfence()
-{
- __atomic_work_item_fence(0, __memory_order_seq_cst, __memory_scope_device);
-}
-
-__device__
-inline
-static void __threadfence_block()
-{
- __atomic_work_item_fence(0, __memory_order_seq_cst, __memory_scope_work_group);
-}
-
-__device__
-inline
-static void __threadfence_system()
-{
- __atomic_work_item_fence(0, __memory_order_seq_cst, __memory_scope_all_svm_devices);
-}
-
-// abort
-__device__
-inline
-__attribute__((weak))
-void abort() {
- return __builtin_trap();
-}
-
-
-#endif // __HCC_OR_HIP_CLANG__
-
-#ifdef __HCC__
-
-/**
- * extern __shared__
- */
-
-// Macro to replace extern __shared__ declarations
-// to local variable definitions
-#define HIP_DYNAMIC_SHARED(type, var) type* var = (type*)__get_dynamicgroupbaseptr();
-
-#define HIP_DYNAMIC_SHARED_ATTRIBUTE
-
-
-#elif defined(__clang__) && defined(__HIP__)
-
-// The noinline attribute helps encapsulate the printf expansion,
-// which otherwise has a performance impact just by increasing the
-// size of the calling function. Additionally, the weak attribute
-// allows the function to exist as a global although its definition is
-// included in every compilation unit.
-#if defined(_WIN32) || defined(_WIN64)
-extern "C" __device__ __attribute__((noinline)) __attribute__((weak))
-void _wassert(const wchar_t *_msg, const wchar_t *_file, unsigned _line) {
- // FIXME: Need `wchar_t` support to generate assertion message.
- __builtin_trap();
-}
-#else /* defined(_WIN32) || defined(_WIN64) */
-extern "C" __device__ __attribute__((noinline)) __attribute__((weak))
-void __assert_fail(const char * __assertion,
- const char *__file,
- unsigned int __line,
- const char *__function)
-{
- printf("%s:%u: %s: Device-side assertion `%s' failed.\n", __file, __line,
- __function, __assertion);
- __builtin_trap();
-}
-
-extern "C" __device__ __attribute__((noinline)) __attribute__((weak))
-void __assertfail(const char * __assertion,
- const char *__file,
- unsigned int __line,
- const char *__function,
- size_t charsize)
-{
- // ignore all the args for now.
- __builtin_trap();
-}
-#endif /* defined(_WIN32) || defined(_WIN64) */
-
-__device__
-inline
-static void __work_group_barrier(__cl_mem_fence_flags flags, __memory_scope scope)
-{
- if (flags) {
- __atomic_work_item_fence(flags, __memory_order_release, scope);
- __builtin_amdgcn_s_barrier();
- __atomic_work_item_fence(flags, __memory_order_acquire, scope);
- } else {
- __builtin_amdgcn_s_barrier();
- }
-}
-
-__device__
-inline
-static void __barrier(int n)
-{
- __work_group_barrier((__cl_mem_fence_flags)n, __memory_scope_work_group);
-}
-
-__device__
-inline
-__attribute__((convergent))
-void __syncthreads()
-{
- __barrier(__CLK_LOCAL_MEM_FENCE);
-}
-
-__device__
-inline
-__attribute__((convergent))
-int __syncthreads_count(int predicate)
-{
- return __ockl_wgred_add_i32(!!predicate);
-}
-
-__device__
-inline
-__attribute__((convergent))
-int __syncthreads_and(int predicate)
-{
- return __ockl_wgred_and_i32(!!predicate);
-}
-
-__device__
-inline
-__attribute__((convergent))
-int __syncthreads_or(int predicate)
-{
- return __ockl_wgred_or_i32(!!predicate);
-}
-
-// hip.amdgcn.bc - device routine
-/*
- HW_ID Register bit structure
- WAVE_ID 3:0 Wave buffer slot number. 0-9.
- SIMD_ID 5:4 SIMD which the wave is assigned to within the CU.
- PIPE_ID 7:6 Pipeline from which the wave was dispatched.
- CU_ID 11:8 Compute Unit the wave is assigned to.
- SH_ID 12 Shader Array (within an SE) the wave is assigned to.
- SE_ID 14:13 Shader Engine the wave is assigned to.
- TG_ID 19:16 Thread-group ID
- VM_ID 23:20 Virtual Memory ID
- QUEUE_ID 26:24 Queue from which this wave was dispatched.
- STATE_ID 29:27 State ID (graphics only, not compute).
- ME_ID 31:30 Micro-engine ID.
- */
-
-#define HW_ID 4
-
-#define HW_ID_CU_ID_SIZE 4
-#define HW_ID_CU_ID_OFFSET 8
-
-#define HW_ID_SE_ID_SIZE 2
-#define HW_ID_SE_ID_OFFSET 13
-
-/*
- Encoding of parameter bitmask
- HW_ID 5:0 HW_ID
- OFFSET 10:6 Range: 0..31
- SIZE 15:11 Range: 1..32
- */
-
-#define GETREG_IMMED(SZ,OFF,REG) (((SZ) << 11) | ((OFF) << 6) | (REG))
-
-/*
- __smid returns the wave's assigned Compute Unit and Shader Engine.
- The Compute Unit, CU_ID returned in bits 3:0, and Shader Engine, SE_ID in bits 5:4.
- Note: the results vary over time.
- SZ minus 1 since SIZE is 1-based.
-*/
-__device__
-inline
-unsigned __smid(void)
-{
- unsigned cu_id = __builtin_amdgcn_s_getreg(
- GETREG_IMMED(HW_ID_CU_ID_SIZE-1, HW_ID_CU_ID_OFFSET, HW_ID));
- unsigned se_id = __builtin_amdgcn_s_getreg(
- GETREG_IMMED(HW_ID_SE_ID_SIZE-1, HW_ID_SE_ID_OFFSET, HW_ID));
-
- /* Each shader engine has 16 CU */
- return (se_id << HW_ID_CU_ID_SIZE) + cu_id;
-}
-
-// Macro to replace extern __shared__ declarations
-// to local variable definitions
-#define HIP_DYNAMIC_SHARED(type, var) \
- type* var = (type*)__amdgcn_get_dynamicgroupbaseptr();
-
-#define HIP_DYNAMIC_SHARED_ATTRIBUTE
-
-
-#endif //defined(__clang__) && defined(__HIP__)
-
-
-// loop unrolling
-static inline __device__ void* __hip_hc_memcpy(void* dst, const void* src, size_t size) {
- auto dstPtr = static_cast<unsigned char*>(dst);
- auto srcPtr = static_cast<const unsigned char*>(src);
-
- while (size >= 4u) {
- dstPtr[0] = srcPtr[0];
- dstPtr[1] = srcPtr[1];
- dstPtr[2] = srcPtr[2];
- dstPtr[3] = srcPtr[3];
-
- size -= 4u;
- srcPtr += 4u;
- dstPtr += 4u;
- }
- switch (size) {
- case 3:
- dstPtr[2] = srcPtr[2];
- case 2:
- dstPtr[1] = srcPtr[1];
- case 1:
- dstPtr[0] = srcPtr[0];
- }
-
- return dst;
-}
-
-static inline __device__ void* __hip_hc_memset(void* dst, unsigned char val, size_t size) {
- auto dstPtr = static_cast<unsigned char*>(dst);
-
- while (size >= 4u) {
- dstPtr[0] = val;
- dstPtr[1] = val;
- dstPtr[2] = val;
- dstPtr[3] = val;
-
- size -= 4u;
- dstPtr += 4u;
- }
- switch (size) {
- case 3:
- dstPtr[2] = val;
- case 2:
- dstPtr[1] = val;
- case 1:
- dstPtr[0] = val;
- }
-
- return dst;
-}
-#ifndef __OPENMP_AMDGCN__
-static inline __device__ void* memcpy(void* dst, const void* src, size_t size) {
- return __hip_hc_memcpy(dst, src, size);
-}
-
-static inline __device__ void* memset(void* ptr, int val, size_t size) {
- unsigned char val8 = static_cast<unsigned char>(val);
- return __hip_hc_memset(ptr, val8, size);
-}
-#endif // !__OPENMP_AMDGCN__
-#endif
diff --git a/third_party/rocm/include/hip/hcc_detail/device_library_decls.h b/third_party/rocm/include/hip/hcc_detail/device_library_decls.h
deleted file mode 100644
index 90aef16..0000000
--- a/third_party/rocm/include/hip/hcc_detail/device_library_decls.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/**
- * @file hcc_detail/device_library_decls.h
- * @brief Contains declarations for types and functions in device library.
- */
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_DEVICE_LIBRARY_DECLS_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_DEVICE_LIBRARY_DECLS_H
-
-#include "hip/hcc_detail/host_defines.h"
-
-typedef unsigned char uchar;
-typedef unsigned short ushort;
-typedef unsigned int uint;
-typedef unsigned long ulong;
-typedef unsigned long long ullong;
-
-extern "C" __device__ __attribute__((const)) bool __ockl_wfany_i32(int);
-extern "C" __device__ __attribute__((const)) bool __ockl_wfall_i32(int);
-extern "C" __device__ uint __ockl_activelane_u32(void);
-
-extern "C" __device__ __attribute__((const)) uint __ockl_mul24_u32(uint, uint);
-extern "C" __device__ __attribute__((const)) int __ockl_mul24_i32(int, int);
-extern "C" __device__ __attribute__((const)) uint __ockl_mul_hi_u32(uint, uint);
-extern "C" __device__ __attribute__((const)) int __ockl_mul_hi_i32(int, int);
-extern "C" __device__ __attribute__((const)) uint __ockl_sadd_u32(uint, uint, uint);
-
-extern "C" __device__ __attribute__((const)) uchar __ockl_clz_u8(uchar);
-extern "C" __device__ __attribute__((const)) ushort __ockl_clz_u16(ushort);
-extern "C" __device__ __attribute__((const)) uint __ockl_clz_u32(uint);
-extern "C" __device__ __attribute__((const)) ullong __ockl_clz_u64(ullong);
-
-extern "C" __device__ __attribute__((const)) float __ocml_floor_f32(float);
-extern "C" __device__ __attribute__((const)) float __ocml_rint_f32(float);
-extern "C" __device__ __attribute__((const)) float __ocml_ceil_f32(float);
-extern "C" __device__ __attribute__((const)) float __ocml_trunc_f32(float);
-
-extern "C" __device__ __attribute__((const)) float __ocml_fmin_f32(float, float);
-extern "C" __device__ __attribute__((const)) float __ocml_fmax_f32(float, float);
-
-extern "C" __device__ __attribute__((convergent)) void __ockl_gws_init(uint nwm1, uint rid);
-extern "C" __device__ __attribute__((convergent)) void __ockl_gws_barrier(uint nwm1, uint rid);
-
-extern "C" __device__ __attribute__((const)) uint32_t __ockl_lane_u32();
-extern "C" __device__ __attribute__((const)) int __ockl_grid_is_valid(void);
-extern "C" __device__ __attribute__((convergent)) void __ockl_grid_sync(void);
-extern "C" __device__ __attribute__((const)) uint __ockl_multi_grid_num_grids(void);
-extern "C" __device__ __attribute__((const)) uint __ockl_multi_grid_grid_rank(void);
-extern "C" __device__ __attribute__((const)) uint __ockl_multi_grid_size(void);
-extern "C" __device__ __attribute__((const)) uint __ockl_multi_grid_thread_rank(void);
-extern "C" __device__ __attribute__((const)) int __ockl_multi_grid_is_valid(void);
-extern "C" __device__ __attribute__((convergent)) void __ockl_multi_grid_sync(void);
-
-extern "C" __device__ void __ockl_atomic_add_noret_f32(float*, float);
-
-extern "C" __device__ __attribute__((convergent)) int __ockl_wgred_add_i32(int a);
-extern "C" __device__ __attribute__((convergent)) int __ockl_wgred_and_i32(int a);
-extern "C" __device__ __attribute__((convergent)) int __ockl_wgred_or_i32(int a);
-
-
-// Introduce local address space
-#define __local __attribute__((address_space(3)))
-
-#ifdef __HIP_DEVICE_COMPILE__
-__device__ inline static __local void* __to_local(unsigned x) { return (__local void*)x; }
-#endif //__HIP_DEVICE_COMPILE__
-
-#if defined(__HCC__) && (__hcc_major__ < 3) && (__hcc_minor__ < 3)
-// __llvm_fence* functions from device-libs/irif/src/fence.ll
-extern "C" __device__ void __llvm_fence_acq_sg(void);
-extern "C" __device__ void __llvm_fence_acq_wg(void);
-extern "C" __device__ void __llvm_fence_acq_dev(void);
-extern "C" __device__ void __llvm_fence_acq_sys(void);
-
-extern "C" __device__ void __llvm_fence_rel_sg(void);
-extern "C" __device__ void __llvm_fence_rel_wg(void);
-extern "C" __device__ void __llvm_fence_rel_dev(void);
-extern "C" __device__ void __llvm_fence_rel_sys(void);
-
-extern "C" __device__ void __llvm_fence_ar_sg(void);
-extern "C" __device__ void __llvm_fence_ar_wg(void);
-extern "C" __device__ void __llvm_fence_ar_dev(void);
-extern "C" __device__ void __llvm_fence_ar_sys(void);
-
-
-extern "C" __device__ void __llvm_fence_sc_sg(void);
-extern "C" __device__ void __llvm_fence_sc_wg(void);
-extern "C" __device__ void __llvm_fence_sc_dev(void);
-extern "C" __device__ void __llvm_fence_sc_sys(void);
-#else
-// Using hip.amdgcn.bc - sync threads
-#define __CLK_LOCAL_MEM_FENCE 0x01
-typedef unsigned __cl_mem_fence_flags;
-
-typedef enum __memory_scope {
- __memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
- __memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
- __memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
- __memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
- __memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
-} __memory_scope;
-
-// enum values aligned with what clang uses in EmitAtomicExpr()
-typedef enum __memory_order
-{
- __memory_order_relaxed = __ATOMIC_RELAXED,
- __memory_order_acquire = __ATOMIC_ACQUIRE,
- __memory_order_release = __ATOMIC_RELEASE,
- __memory_order_acq_rel = __ATOMIC_ACQ_REL,
- __memory_order_seq_cst = __ATOMIC_SEQ_CST
-} __memory_order;
-
-// Linked from hip.amdgcn.bc
-extern "C" __device__ void
-__atomic_work_item_fence(__cl_mem_fence_flags, __memory_order, __memory_scope);
-#endif
-
-#endif
diff --git a/third_party/rocm/include/hip/hcc_detail/driver_types.h b/third_party/rocm/include/hip/hcc_detail/driver_types.h
deleted file mode 100644
index 7db78e5..0000000
--- a/third_party/rocm/include/hip/hcc_detail/driver_types.h
+++ /dev/null
@@ -1,466 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_DRIVER_TYPES_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_DRIVER_TYPES_H
-
-#ifndef __cplusplus
-#include <stdbool.h>
-#endif
-
-typedef void* hipDeviceptr_t;
-typedef enum hipChannelFormatKind {
- hipChannelFormatKindSigned = 0,
- hipChannelFormatKindUnsigned = 1,
- hipChannelFormatKindFloat = 2,
- hipChannelFormatKindNone = 3
-}hipChannelFormatKind;
-
-typedef struct hipChannelFormatDesc {
- int x;
- int y;
- int z;
- int w;
- enum hipChannelFormatKind f;
-}hipChannelFormatDesc;
-
-#define HIP_TRSA_OVERRIDE_FORMAT 0x01
-#define HIP_TRSF_READ_AS_INTEGER 0x01
-#define HIP_TRSF_NORMALIZED_COORDINATES 0x02
-#define HIP_TRSF_SRGB 0x10
-
-typedef enum hipArray_Format {
- HIP_AD_FORMAT_UNSIGNED_INT8 = 0x01,
- HIP_AD_FORMAT_UNSIGNED_INT16 = 0x02,
- HIP_AD_FORMAT_UNSIGNED_INT32 = 0x03,
- HIP_AD_FORMAT_SIGNED_INT8 = 0x08,
- HIP_AD_FORMAT_SIGNED_INT16 = 0x09,
- HIP_AD_FORMAT_SIGNED_INT32 = 0x0a,
- HIP_AD_FORMAT_HALF = 0x10,
- HIP_AD_FORMAT_FLOAT = 0x20
-}hipArray_Format;
-
-typedef struct HIP_ARRAY_DESCRIPTOR {
- size_t Width;
- size_t Height;
- enum hipArray_Format Format;
- unsigned int NumChannels;
-}HIP_ARRAY_DESCRIPTOR;
-
-typedef struct HIP_ARRAY3D_DESCRIPTOR {
- size_t Width;
- size_t Height;
- size_t Depth;
- enum hipArray_Format Format;
- unsigned int NumChannels;
- unsigned int Flags;
-}HIP_ARRAY3D_DESCRIPTOR;
-
-typedef struct hipArray {
- void* data; // FIXME: generalize this
- struct hipChannelFormatDesc desc;
- unsigned int type;
- unsigned int width;
- unsigned int height;
- unsigned int depth;
- enum hipArray_Format Format;
- unsigned int NumChannels;
- bool isDrv;
- unsigned int textureType;
-}hipArray;
-
-typedef struct hip_Memcpy2D {
- size_t srcXInBytes;
- size_t srcY;
- hipMemoryType srcMemoryType;
- const void* srcHost;
- hipDeviceptr_t srcDevice;
- hipArray* srcArray;
- size_t srcPitch;
- size_t dstXInBytes;
- size_t dstY;
- hipMemoryType dstMemoryType;
- void* dstHost;
- hipDeviceptr_t dstDevice;
- hipArray* dstArray;
- size_t dstPitch;
- size_t WidthInBytes;
- size_t Height;
-} hip_Memcpy2D;
-
-
-typedef struct hipArray* hipArray_t;
-typedef hipArray_t hiparray;
-typedef const struct hipArray* hipArray_const_t;
-
-// TODO: It needs to be modified since it was just copied from hipArray.
-struct hipMipmappedArray {
- void* data; // FIXME: generalize this
- struct hipChannelFormatDesc desc;
- unsigned int width;
- unsigned int height;
- unsigned int depth;
-};
-
-typedef struct hipMipmappedArray* hipMipmappedArray_t;
-
-typedef const struct hipMipmappedArray* hipMipmappedArray_const_t;
-
-/**
- * hip resource types
- */
-typedef enum hipResourceType {
- hipResourceTypeArray = 0x00,
- hipResourceTypeMipmappedArray = 0x01,
- hipResourceTypeLinear = 0x02,
- hipResourceTypePitch2D = 0x03
-}hipResourceType;
-
-typedef enum HIPresourcetype_enum {
- HIP_RESOURCE_TYPE_ARRAY = 0x00, /**< Array resoure */
- HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x01, /**< Mipmapped array resource */
- HIP_RESOURCE_TYPE_LINEAR = 0x02, /**< Linear resource */
- HIP_RESOURCE_TYPE_PITCH2D = 0x03 /**< Pitch 2D resource */
-} HIPresourcetype;
-
-/**
- * hip address modes
- */
-typedef enum HIPaddress_mode_enum {
- HIP_TR_ADDRESS_MODE_WRAP = 0,
- HIP_TR_ADDRESS_MODE_CLAMP = 1,
- HIP_TR_ADDRESS_MODE_MIRROR = 2,
- HIP_TR_ADDRESS_MODE_BORDER = 3
-} HIPaddress_mode;
-
-/**
- * hip filter modes
- */
-typedef enum HIPfilter_mode_enum {
- HIP_TR_FILTER_MODE_POINT = 0,
- HIP_TR_FILTER_MODE_LINEAR = 1
-} HIPfilter_mode;
-
-/**
- * Texture descriptor
- */
-typedef struct HIP_TEXTURE_DESC_st {
- HIPaddress_mode addressMode[3]; /**< Address modes */
- HIPfilter_mode filterMode; /**< Filter mode */
- unsigned int flags; /**< Flags */
- unsigned int maxAnisotropy; /**< Maximum anisotropy ratio */
- HIPfilter_mode mipmapFilterMode; /**< Mipmap filter mode */
- float mipmapLevelBias; /**< Mipmap level bias */
- float minMipmapLevelClamp; /**< Mipmap minimum level clamp */
- float maxMipmapLevelClamp; /**< Mipmap maximum level clamp */
- float borderColor[4]; /**< Border Color */
- int reserved[12];
-} HIP_TEXTURE_DESC;
-
-/**
- * hip texture resource view formats
- */
-typedef enum hipResourceViewFormat {
- hipResViewFormatNone = 0x00,
- hipResViewFormatUnsignedChar1 = 0x01,
- hipResViewFormatUnsignedChar2 = 0x02,
- hipResViewFormatUnsignedChar4 = 0x03,
- hipResViewFormatSignedChar1 = 0x04,
- hipResViewFormatSignedChar2 = 0x05,
- hipResViewFormatSignedChar4 = 0x06,
- hipResViewFormatUnsignedShort1 = 0x07,
- hipResViewFormatUnsignedShort2 = 0x08,
- hipResViewFormatUnsignedShort4 = 0x09,
- hipResViewFormatSignedShort1 = 0x0a,
- hipResViewFormatSignedShort2 = 0x0b,
- hipResViewFormatSignedShort4 = 0x0c,
- hipResViewFormatUnsignedInt1 = 0x0d,
- hipResViewFormatUnsignedInt2 = 0x0e,
- hipResViewFormatUnsignedInt4 = 0x0f,
- hipResViewFormatSignedInt1 = 0x10,
- hipResViewFormatSignedInt2 = 0x11,
- hipResViewFormatSignedInt4 = 0x12,
- hipResViewFormatHalf1 = 0x13,
- hipResViewFormatHalf2 = 0x14,
- hipResViewFormatHalf4 = 0x15,
- hipResViewFormatFloat1 = 0x16,
- hipResViewFormatFloat2 = 0x17,
- hipResViewFormatFloat4 = 0x18,
- hipResViewFormatUnsignedBlockCompressed1 = 0x19,
- hipResViewFormatUnsignedBlockCompressed2 = 0x1a,
- hipResViewFormatUnsignedBlockCompressed3 = 0x1b,
- hipResViewFormatUnsignedBlockCompressed4 = 0x1c,
- hipResViewFormatSignedBlockCompressed4 = 0x1d,
- hipResViewFormatUnsignedBlockCompressed5 = 0x1e,
- hipResViewFormatSignedBlockCompressed5 = 0x1f,
- hipResViewFormatUnsignedBlockCompressed6H = 0x20,
- hipResViewFormatSignedBlockCompressed6H = 0x21,
- hipResViewFormatUnsignedBlockCompressed7 = 0x22
-}hipResourceViewFormat;
-
-typedef enum HIPresourceViewFormat_enum
-{
- HIP_RES_VIEW_FORMAT_NONE = 0x00, /**< No resource view format (use underlying resource format) */
- HIP_RES_VIEW_FORMAT_UINT_1X8 = 0x01, /**< 1 channel unsigned 8-bit integers */
- HIP_RES_VIEW_FORMAT_UINT_2X8 = 0x02, /**< 2 channel unsigned 8-bit integers */
- HIP_RES_VIEW_FORMAT_UINT_4X8 = 0x03, /**< 4 channel unsigned 8-bit integers */
- HIP_RES_VIEW_FORMAT_SINT_1X8 = 0x04, /**< 1 channel signed 8-bit integers */
- HIP_RES_VIEW_FORMAT_SINT_2X8 = 0x05, /**< 2 channel signed 8-bit integers */
- HIP_RES_VIEW_FORMAT_SINT_4X8 = 0x06, /**< 4 channel signed 8-bit integers */
- HIP_RES_VIEW_FORMAT_UINT_1X16 = 0x07, /**< 1 channel unsigned 16-bit integers */
- HIP_RES_VIEW_FORMAT_UINT_2X16 = 0x08, /**< 2 channel unsigned 16-bit integers */
- HIP_RES_VIEW_FORMAT_UINT_4X16 = 0x09, /**< 4 channel unsigned 16-bit integers */
- HIP_RES_VIEW_FORMAT_SINT_1X16 = 0x0a, /**< 1 channel signed 16-bit integers */
- HIP_RES_VIEW_FORMAT_SINT_2X16 = 0x0b, /**< 2 channel signed 16-bit integers */
- HIP_RES_VIEW_FORMAT_SINT_4X16 = 0x0c, /**< 4 channel signed 16-bit integers */
- HIP_RES_VIEW_FORMAT_UINT_1X32 = 0x0d, /**< 1 channel unsigned 32-bit integers */
- HIP_RES_VIEW_FORMAT_UINT_2X32 = 0x0e, /**< 2 channel unsigned 32-bit integers */
- HIP_RES_VIEW_FORMAT_UINT_4X32 = 0x0f, /**< 4 channel unsigned 32-bit integers */
- HIP_RES_VIEW_FORMAT_SINT_1X32 = 0x10, /**< 1 channel signed 32-bit integers */
- HIP_RES_VIEW_FORMAT_SINT_2X32 = 0x11, /**< 2 channel signed 32-bit integers */
- HIP_RES_VIEW_FORMAT_SINT_4X32 = 0x12, /**< 4 channel signed 32-bit integers */
- HIP_RES_VIEW_FORMAT_FLOAT_1X16 = 0x13, /**< 1 channel 16-bit floating point */
- HIP_RES_VIEW_FORMAT_FLOAT_2X16 = 0x14, /**< 2 channel 16-bit floating point */
- HIP_RES_VIEW_FORMAT_FLOAT_4X16 = 0x15, /**< 4 channel 16-bit floating point */
- HIP_RES_VIEW_FORMAT_FLOAT_1X32 = 0x16, /**< 1 channel 32-bit floating point */
- HIP_RES_VIEW_FORMAT_FLOAT_2X32 = 0x17, /**< 2 channel 32-bit floating point */
- HIP_RES_VIEW_FORMAT_FLOAT_4X32 = 0x18, /**< 4 channel 32-bit floating point */
- HIP_RES_VIEW_FORMAT_UNSIGNED_BC1 = 0x19, /**< Block compressed 1 */
- HIP_RES_VIEW_FORMAT_UNSIGNED_BC2 = 0x1a, /**< Block compressed 2 */
- HIP_RES_VIEW_FORMAT_UNSIGNED_BC3 = 0x1b, /**< Block compressed 3 */
- HIP_RES_VIEW_FORMAT_UNSIGNED_BC4 = 0x1c, /**< Block compressed 4 unsigned */
- HIP_RES_VIEW_FORMAT_SIGNED_BC4 = 0x1d, /**< Block compressed 4 signed */
- HIP_RES_VIEW_FORMAT_UNSIGNED_BC5 = 0x1e, /**< Block compressed 5 unsigned */
- HIP_RES_VIEW_FORMAT_SIGNED_BC5 = 0x1f, /**< Block compressed 5 signed */
- HIP_RES_VIEW_FORMAT_UNSIGNED_BC6H = 0x20, /**< Block compressed 6 unsigned half-float */
- HIP_RES_VIEW_FORMAT_SIGNED_BC6H = 0x21, /**< Block compressed 6 signed half-float */
- HIP_RES_VIEW_FORMAT_UNSIGNED_BC7 = 0x22 /**< Block compressed 7 */
-} HIPresourceViewFormat;
-
-/**
- * HIP resource descriptor
- */
-typedef struct hipResourceDesc {
- enum hipResourceType resType;
-
- union {
- struct {
- hipArray_t array;
- } array;
- struct {
- hipMipmappedArray_t mipmap;
- } mipmap;
- struct {
- void* devPtr;
- struct hipChannelFormatDesc desc;
- size_t sizeInBytes;
- } linear;
- struct {
- void* devPtr;
- struct hipChannelFormatDesc desc;
- size_t width;
- size_t height;
- size_t pitchInBytes;
- } pitch2D;
- } res;
-}hipResourceDesc;
-
-typedef struct HIP_RESOURCE_DESC_st
-{
- HIPresourcetype resType; /**< Resource type */
-
- union {
- struct {
- hipArray_t hArray; /**< HIP array */
- } array;
- struct {
- hipMipmappedArray_t hMipmappedArray; /**< HIP mipmapped array */
- } mipmap;
- struct {
- hipDeviceptr_t devPtr; /**< Device pointer */
- hipArray_Format format; /**< Array format */
- unsigned int numChannels; /**< Channels per array element */
- size_t sizeInBytes; /**< Size in bytes */
- } linear;
- struct {
- hipDeviceptr_t devPtr; /**< Device pointer */
- hipArray_Format format; /**< Array format */
- unsigned int numChannels; /**< Channels per array element */
- size_t width; /**< Width of the array in elements */
- size_t height; /**< Height of the array in elements */
- size_t pitchInBytes; /**< Pitch between two rows in bytes */
- } pitch2D;
- struct {
- int reserved[32];
- } reserved;
- } res;
-
- unsigned int flags; /**< Flags (must be zero) */
-} HIP_RESOURCE_DESC;
-
-/**
- * hip resource view descriptor
- */
-struct hipResourceViewDesc {
- enum hipResourceViewFormat format;
- size_t width;
- size_t height;
- size_t depth;
- unsigned int firstMipmapLevel;
- unsigned int lastMipmapLevel;
- unsigned int firstLayer;
- unsigned int lastLayer;
-};
-
-/**
- * Resource view descriptor
- */
-typedef struct HIP_RESOURCE_VIEW_DESC_st
-{
- HIPresourceViewFormat format; /**< Resource view format */
- size_t width; /**< Width of the resource view */
- size_t height; /**< Height of the resource view */
- size_t depth; /**< Depth of the resource view */
- unsigned int firstMipmapLevel; /**< First defined mipmap level */
- unsigned int lastMipmapLevel; /**< Last defined mipmap level */
- unsigned int firstLayer; /**< First layer index */
- unsigned int lastLayer; /**< Last layer index */
- unsigned int reserved[16];
-} HIP_RESOURCE_VIEW_DESC;
-
-/**
- * Memory copy types
- *
- */
-typedef enum hipMemcpyKind {
- hipMemcpyHostToHost = 0, ///< Host-to-Host Copy
- hipMemcpyHostToDevice = 1, ///< Host-to-Device Copy
- hipMemcpyDeviceToHost = 2, ///< Device-to-Host Copy
- hipMemcpyDeviceToDevice = 3, ///< Device-to-Device Copy
- hipMemcpyDefault =
- 4 ///< Runtime will automatically determine copy-kind based on virtual addresses.
-} hipMemcpyKind;
-
-typedef struct hipPitchedPtr {
- void* ptr;
- size_t pitch;
- size_t xsize;
- size_t ysize;
-}hipPitchedPtr;
-
-typedef struct hipExtent {
- size_t width; // Width in elements when referring to array memory, in bytes when referring to
- // linear memory
- size_t height;
- size_t depth;
-}hipExtent;
-
-typedef struct hipPos {
- size_t x;
- size_t y;
- size_t z;
-}hipPos;
-
-typedef struct hipMemcpy3DParms {
- hipArray_t srcArray;
- struct hipPos srcPos;
- struct hipPitchedPtr srcPtr;
- hipArray_t dstArray;
- struct hipPos dstPos;
- struct hipPitchedPtr dstPtr;
- struct hipExtent extent;
- enum hipMemcpyKind kind;
-} hipMemcpy3DParms;
-
-typedef struct HIP_MEMCPY3D {
- unsigned int srcXInBytes;
- unsigned int srcY;
- unsigned int srcZ;
- unsigned int srcLOD;
- hipMemoryType srcMemoryType;
- const void* srcHost;
- hipDeviceptr_t srcDevice;
- hipArray_t srcArray;
- unsigned int srcPitch;
- unsigned int srcHeight;
- unsigned int dstXInBytes;
- unsigned int dstY;
- unsigned int dstZ;
- unsigned int dstLOD;
- hipMemoryType dstMemoryType;
- void* dstHost;
- hipDeviceptr_t dstDevice;
- hipArray_t dstArray;
- unsigned int dstPitch;
- unsigned int dstHeight;
- unsigned int WidthInBytes;
- unsigned int Height;
- unsigned int Depth;
-} HIP_MEMCPY3D;
-
-static inline struct hipPitchedPtr make_hipPitchedPtr(void* d, size_t p, size_t xsz,
- size_t ysz) {
- struct hipPitchedPtr s;
-
- s.ptr = d;
- s.pitch = p;
- s.xsize = xsz;
- s.ysize = ysz;
-
- return s;
-}
-
-static inline struct hipPos make_hipPos(size_t x, size_t y, size_t z) {
- struct hipPos p;
-
- p.x = x;
- p.y = y;
- p.z = z;
-
- return p;
-}
-
-static inline struct hipExtent make_hipExtent(size_t w, size_t h, size_t d) {
- struct hipExtent e;
-
- e.width = w;
- e.height = h;
- e.depth = d;
-
- return e;
-}
-
-typedef enum hipFunction_attribute {
- HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
- HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES,
- HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES,
- HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES,
- HIP_FUNC_ATTRIBUTE_NUM_REGS,
- HIP_FUNC_ATTRIBUTE_PTX_VERSION,
- HIP_FUNC_ATTRIBUTE_BINARY_VERSION,
- HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA,
- HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES,
- HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT,
- HIP_FUNC_ATTRIBUTE_MAX
-}hipFunction_attribute;
-
-#endif
diff --git a/third_party/rocm/include/hip/hcc_detail/elfio/elf_types.hpp b/third_party/rocm/include/hip/hcc_detail/elfio/elf_types.hpp
deleted file mode 100644
index a17b700..0000000
--- a/third_party/rocm/include/hip/hcc_detail/elfio/elf_types.hpp
+++ /dev/null
@@ -1,748 +0,0 @@
-/*
-Copyright (C) 2001-2015 by Serge Lamikhov-Center
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef ELFTYPES_H
-#define ELFTYPES_H
-
-#ifndef ELFIO_NO_OWN_TYPES
-#if !defined(ELFIO_NO_CSTDINT) && !defined(ELFIO_NO_INTTYPES)
-#include <stdint.h>
-#else
-typedef unsigned char uint8_t;
-typedef signed char int8_t;
-typedef unsigned short uint16_t;
-typedef signed short int16_t;
-#ifdef _MSC_VER
-typedef unsigned __int32 uint32_t;
-typedef signed __int32 int32_t;
-typedef unsigned __int64 uint64_t;
-typedef signed __int64 int64_t;
-#else
-typedef unsigned int uint32_t;
-typedef signed int int32_t;
-typedef unsigned long long uint64_t;
-typedef signed long long int64_t;
-#endif // _MSC_VER
-#endif // ELFIO_NO_CSTDINT
-#endif // ELFIO_NO_OWN_TYPES
-
-namespace ELFIO {
-
-// Attention! Platform depended definitions.
-typedef uint16_t Elf_Half;
-typedef uint32_t Elf_Word;
-typedef int32_t Elf_Sword;
-typedef uint64_t Elf_Xword;
-typedef int64_t Elf_Sxword;
-
-typedef uint32_t Elf32_Addr;
-typedef uint32_t Elf32_Off;
-typedef uint64_t Elf64_Addr;
-typedef uint64_t Elf64_Off;
-
-#define Elf32_Half Elf_Half
-#define Elf64_Half Elf_Half
-#define Elf32_Word Elf_Word
-#define Elf64_Word Elf_Word
-#define Elf32_Sword Elf_Sword
-#define Elf64_Sword Elf_Sword
-
-///////////////////////
-// ELF Header Constants
-
-// File type
-#define ET_NONE 0
-#define ET_REL 1
-#define ET_EXEC 2
-#define ET_DYN 3
-#define ET_CORE 4
-#define ET_LOOS 0xFE00
-#define ET_HIOS 0xFEFF
-#define ET_LOPROC 0xFF00
-#define ET_HIPROC 0xFFFF
-
-
-#define EM_NONE 0 // No machine
-#define EM_M32 1 // AT&T WE 32100
-#define EM_SPARC 2 // SUN SPARC
-#define EM_386 3 // Intel 80386
-#define EM_68K 4 // Motorola m68k family
-#define EM_88K 5 // Motorola m88k family
-#define EM_486 6 // Intel 80486// Reserved for future use
-#define EM_860 7 // Intel 80860
-#define EM_MIPS 8 // MIPS R3000 (officially, big-endian only)
-#define EM_S370 9 // IBM System/370
-#define EM_MIPS_RS3_LE 10 // MIPS R3000 little-endian (Oct 4 1999 Draft) Deprecated
-#define EM_res011 11 // Reserved
-#define EM_res012 12 // Reserved
-#define EM_res013 13 // Reserved
-#define EM_res014 14 // Reserved
-#define EM_PARISC 15 // HPPA
-#define EM_res016 16 // Reserved
-#define EM_VPP550 17 // Fujitsu VPP500
-#define EM_SPARC32PLUS 18 // Sun's "v8plus"
-#define EM_960 19 // Intel 80960
-#define EM_PPC 20 // PowerPC
-#define EM_PPC64 21 // 64-bit PowerPC
-#define EM_S390 22 // IBM S/390
-#define EM_SPU 23 // Sony/Toshiba/IBM SPU
-#define EM_res024 24 // Reserved
-#define EM_res025 25 // Reserved
-#define EM_res026 26 // Reserved
-#define EM_res027 27 // Reserved
-#define EM_res028 28 // Reserved
-#define EM_res029 29 // Reserved
-#define EM_res030 30 // Reserved
-#define EM_res031 31 // Reserved
-#define EM_res032 32 // Reserved
-#define EM_res033 33 // Reserved
-#define EM_res034 34 // Reserved
-#define EM_res035 35 // Reserved
-#define EM_V800 36 // NEC V800 series
-#define EM_FR20 37 // Fujitsu FR20
-#define EM_RH32 38 // TRW RH32
-#define EM_MCORE 39 // Motorola M*Core // May also be taken by Fujitsu MMA
-#define EM_RCE 39 // Old name for MCore
-#define EM_ARM 40 // ARM
-#define EM_OLD_ALPHA 41 // Digital Alpha
-#define EM_SH 42 // Renesas (formerly Hitachi) / SuperH SH
-#define EM_SPARCV9 43 // SPARC v9 64-bit
-#define EM_TRICORE 44 // Siemens Tricore embedded processor
-#define EM_ARC 45 // ARC Cores
-#define EM_H8_300 46 // Renesas (formerly Hitachi) H8/300
-#define EM_H8_300H 47 // Renesas (formerly Hitachi) H8/300H
-#define EM_H8S 48 // Renesas (formerly Hitachi) H8S
-#define EM_H8_500 49 // Renesas (formerly Hitachi) H8/500
-#define EM_IA_64 50 // Intel IA-64 Processor
-#define EM_MIPS_X 51 // Stanford MIPS-X
-#define EM_COLDFIRE 52 // Motorola Coldfire
-#define EM_68HC12 53 // Motorola M68HC12
-#define EM_MMA 54 // Fujitsu Multimedia Accelerator
-#define EM_PCP 55 // Siemens PCP
-#define EM_NCPU 56 // Sony nCPU embedded RISC processor
-#define EM_NDR1 57 // Denso NDR1 microprocesspr
-#define EM_STARCORE 58 // Motorola Star*Core processor
-#define EM_ME16 59 // Toyota ME16 processor
-#define EM_ST100 60 // STMicroelectronics ST100 processor
-#define EM_TINYJ 61 // Advanced Logic Corp. TinyJ embedded processor
-#define EM_X86_64 62 // Advanced Micro Devices X86-64 processor
-#define EM_PDSP 63 // Sony DSP Processor
-#define EM_PDP10 64 // Digital Equipment Corp. PDP-10
-#define EM_PDP11 65 // Digital Equipment Corp. PDP-11
-#define EM_FX66 66 // Siemens FX66 microcontroller
-#define EM_ST9PLUS 67 // STMicroelectronics ST9+ 8/16 bit microcontroller
-#define EM_ST7 68 // STMicroelectronics ST7 8-bit microcontroller
-#define EM_68HC16 69 // Motorola MC68HC16 Microcontroller
-#define EM_68HC11 70 // Motorola MC68HC11 Microcontroller
-#define EM_68HC08 71 // Motorola MC68HC08 Microcontroller
-#define EM_68HC05 72 // Motorola MC68HC05 Microcontroller
-#define EM_SVX 73 // Silicon Graphics SVx
-#define EM_ST19 74 // STMicroelectronics ST19 8-bit cpu
-#define EM_VAX 75 // Digital VAX
-#define EM_CRIS 76 // Axis Communications 32-bit embedded processor
-#define EM_JAVELIN 77 // Infineon Technologies 32-bit embedded cpu
-#define EM_FIREPATH 78 // Element 14 64-bit DSP processor
-#define EM_ZSP 79 // LSI Logic's 16-bit DSP processor
-#define EM_MMIX 80 // Donald Knuth's educational 64-bit processor
-#define EM_HUANY 81 // Harvard's machine-independent format
-#define EM_PRISM 82 // SiTera Prism
-#define EM_AVR 83 // Atmel AVR 8-bit microcontroller
-#define EM_FR30 84 // Fujitsu FR30
-#define EM_D10V 85 // Mitsubishi D10V
-#define EM_D30V 86 // Mitsubishi D30V
-#define EM_V850 87 // NEC v850
-#define EM_M32R 88 // Renesas M32R (formerly Mitsubishi M32R)
-#define EM_MN10300 89 // Matsushita MN10300
-#define EM_MN10200 90 // Matsushita MN10200
-#define EM_PJ 91 // picoJava
-#define EM_OPENRISC 92 // OpenRISC 32-bit embedded processor
-#define EM_ARC_A5 93 // ARC Cores Tangent-A5
-#define EM_XTENSA 94 // Tensilica Xtensa Architecture
-#define EM_VIDEOCORE 95 // Alphamosaic VideoCore processor
-#define EM_TMM_GPP 96 // Thompson Multimedia General Purpose Processor
-#define EM_NS32K 97 // National Semiconductor 32000 series
-#define EM_TPC 98 // Tenor Network TPC processor
-#define EM_SNP1K 99 // Trebia SNP 1000 processor
-#define EM_ST200 100 // STMicroelectronics ST200 microcontroller
-#define EM_IP2K 101 // Ubicom IP2022 micro controller
-#define EM_MAX 102 // MAX Processor
-#define EM_CR 103 // National Semiconductor CompactRISC
-#define EM_F2MC16 104 // Fujitsu F2MC16
-#define EM_MSP430 105 // TI msp430 micro controller
-#define EM_BLACKFIN 106 // ADI Blackfin
-#define EM_SE_C33 107 // S1C33 Family of Seiko Epson processors
-#define EM_SEP 108 // Sharp embedded microprocessor
-#define EM_ARCA 109 // Arca RISC Microprocessor
-#define EM_UNICORE 110 // Microprocessor series from PKU-Unity Ltd. and MPRC of Peking University
-#define EM_EXCESS 111 // eXcess: 16/32/64-bit configurable embedded CPU
-#define EM_DXP 112 // Icera Semiconductor Inc. Deep Execution Processor
-#define EM_ALTERA_NIOS2 113 // Altera Nios II soft-core processor
-#define EM_CRX 114 // National Semiconductor CRX
-#define EM_XGATE 115 // Motorola XGATE embedded processor
-#define EM_C166 116 // Infineon C16x/XC16x processor
-#define EM_M16C 117 // Renesas M16C series microprocessors
-#define EM_DSPIC30F 118 // Microchip Technology dsPIC30F Digital Signal Controller
-#define EM_CE 119 // Freescale Communication Engine RISC core
-#define EM_M32C 120 // Renesas M32C series microprocessors
-#define EM_res121 121 // Reserved
-#define EM_res122 122 // Reserved
-#define EM_res123 123 // Reserved
-#define EM_res124 124 // Reserved
-#define EM_res125 125 // Reserved
-#define EM_res126 126 // Reserved
-#define EM_res127 127 // Reserved
-#define EM_res128 128 // Reserved
-#define EM_res129 129 // Reserved
-#define EM_res130 130 // Reserved
-#define EM_TSK3000 131 // Altium TSK3000 core
-#define EM_RS08 132 // Freescale RS08 embedded processor
-#define EM_res133 133 // Reserved
-#define EM_ECOG2 134 // Cyan Technology eCOG2 microprocessor
-#define EM_SCORE 135 // Sunplus Score
-#define EM_SCORE7 135 // Sunplus S+core7 RISC processor
-#define EM_DSP24 136 // New Japan Radio (NJR) 24-bit DSP Processor
-#define EM_VIDEOCORE3 137 // Broadcom VideoCore III processor
-#define EM_LATTICEMICO32 138 // RISC processor for Lattice FPGA architecture
-#define EM_SE_C17 139 // Seiko Epson C17 family
-#define EM_TI_C6000 140 // Texas Instruments TMS320C6000 DSP family
-#define EM_TI_C2000 141 // Texas Instruments TMS320C2000 DSP family
-#define EM_TI_C5500 142 // Texas Instruments TMS320C55x DSP family
-#define EM_res143 143 // Reserved
-#define EM_res144 144 // Reserved
-#define EM_res145 145 // Reserved
-#define EM_res146 146 // Reserved
-#define EM_res147 147 // Reserved
-#define EM_res148 148 // Reserved
-#define EM_res149 149 // Reserved
-#define EM_res150 150 // Reserved
-#define EM_res151 151 // Reserved
-#define EM_res152 152 // Reserved
-#define EM_res153 153 // Reserved
-#define EM_res154 154 // Reserved
-#define EM_res155 155 // Reserved
-#define EM_res156 156 // Reserved
-#define EM_res157 157 // Reserved
-#define EM_res158 158 // Reserved
-#define EM_res159 159 // Reserved
-#define EM_MMDSP_PLUS 160 // STMicroelectronics 64bit VLIW Data Signal Processor
-#define EM_CYPRESS_M8C 161 // Cypress M8C microprocessor
-#define EM_R32C 162 // Renesas R32C series microprocessors
-#define EM_TRIMEDIA 163 // NXP Semiconductors TriMedia architecture family
-#define EM_QDSP6 164 // QUALCOMM DSP6 Processor
-#define EM_8051 165 // Intel 8051 and variants
-#define EM_STXP7X 166 // STMicroelectronics STxP7x family
-#define EM_NDS32 167 // Andes Technology compact code size embedded RISC processor family
-#define EM_ECOG1 168 // Cyan Technology eCOG1X family
-#define EM_ECOG1X 168 // Cyan Technology eCOG1X family
-#define EM_MAXQ30 169 // Dallas Semiconductor MAXQ30 Core Micro-controllers
-#define EM_XIMO16 170 // New Japan Radio (NJR) 16-bit DSP Processor
-#define EM_MANIK 171 // M2000 Reconfigurable RISC Microprocessor
-#define EM_CRAYNV2 172 // Cray Inc. NV2 vector architecture
-#define EM_RX 173 // Renesas RX family
-#define EM_METAG 174 // Imagination Technologies META processor architecture
-#define EM_MCST_ELBRUS 175 // MCST Elbrus general purpose hardware architecture
-#define EM_ECOG16 176 // Cyan Technology eCOG16 family
-#define EM_CR16 177 // National Semiconductor CompactRISC 16-bit processor
-#define EM_ETPU 178 // Freescale Extended Time Processing Unit
-#define EM_SLE9X 179 // Infineon Technologies SLE9X core
-#define EM_L1OM 180 // Intel L1OM
-#define EM_INTEL181 181 // Reserved by Intel
-#define EM_INTEL182 182 // Reserved by Intel
-#define EM_res183 183 // Reserved by ARM
-#define EM_res184 184 // Reserved by ARM
-#define EM_AVR32 185 // Atmel Corporation 32-bit microprocessor family
-#define EM_STM8 186 // STMicroeletronics STM8 8-bit microcontroller
-#define EM_TILE64 187 // Tilera TILE64 multicore architecture family
-#define EM_TILEPRO 188 // Tilera TILEPro multicore architecture family
-#define EM_MICROBLAZE 189 // Xilinx MicroBlaze 32-bit RISC soft processor core
-#define EM_CUDA 190 // NVIDIA CUDA architecture
-#define EM_TILEGX 191 // Tilera TILE-Gx multicore architecture family
-#define EM_CLOUDSHIELD 192 // CloudShield architecture family
-#define EM_COREA_1ST 193 // KIPO-KAIST Core-A 1st generation processor family
-#define EM_COREA_2ND 194 // KIPO-KAIST Core-A 2nd generation processor family
-#define EM_ARC_COMPACT2 195 // Synopsys ARCompact V2
-#define EM_OPEN8 196 // Open8 8-bit RISC soft processor core
-#define EM_RL78 197 // Renesas RL78 family
-#define EM_VIDEOCORE5 198 // Broadcom VideoCore V processor
-#define EM_78KOR 199 // Renesas 78KOR family
-#define EM_56800EX 200 // Freescale 56800EX Digital Signal Controller (DSC)
-#define EM_BA1 201 // Beyond BA1 CPU architecture
-#define EM_BA2 202 // Beyond BA2 CPU architecture
-#define EM_XCORE 203 // XMOS xCORE processor family
-#define EM_MCHP_PIC 204 // Microchip 8-bit PIC(r) family
-#define EM_INTEL205 205 // Reserved by Intel
-#define EM_INTEL206 206 // Reserved by Intel
-#define EM_INTEL207 207 // Reserved by Intel
-#define EM_INTEL208 208 // Reserved by Intel
-#define EM_INTEL209 209 // Reserved by Intel
-#define EM_KM32 210 // KM211 KM32 32-bit processor
-#define EM_KMX32 211 // KM211 KMX32 32-bit processor
-#define EM_KMX16 212 // KM211 KMX16 16-bit processor
-#define EM_KMX8 213 // KM211 KMX8 8-bit processor
-#define EM_KVARC 214 // KM211 KVARC processor
-#define EM_CDP 215 // Paneve CDP architecture family
-#define EM_COGE 216 // Cognitive Smart Memory Processor
-#define EM_COOL 217 // iCelero CoolEngine
-#define EM_NORC 218 // Nanoradio Optimized RISC
-#define EM_CSR_KALIMBA 219 // CSR Kalimba architecture family
-#define EM_Z80 220 // Zilog Z80
-#define EM_VISIUM 221 // Controls and Data Services VISIUMcore processor
-#define EM_FT32 222 // FTDI Chip FT32 high performance 32-bit RISC architecture
-#define EM_MOXIE 223 // Moxie processor family
-#define EM_AMDGPU 224 // AMD GPU architecture
-#define EM_RISCV 243 // RISC-V
-#define EM_LANAI 244 // Lanai processor
-#define EM_CEVA 245 // CEVA Processor Architecture Family
-#define EM_CEVA_X2 246 // CEVA X2 Processor Family
-#define EM_BPF 247 // Linux BPF – in-kernel virtual machine
-
-// File version
-#define EV_NONE 0
-#define EV_CURRENT 1
-
-// Identification index
-#define EI_MAG0 0
-#define EI_MAG1 1
-#define EI_MAG2 2
-#define EI_MAG3 3
-#define EI_CLASS 4
-#define EI_DATA 5
-#define EI_VERSION 6
-#define EI_OSABI 7
-#define EI_ABIVERSION 8
-#define EI_PAD 9
-#define EI_NIDENT 16
-
-// Magic number
-#define ELFMAG0 0x7F
-#define ELFMAG1 'E'
-#define ELFMAG2 'L'
-#define ELFMAG3 'F'
-
-// File class
-#define ELFCLASSNONE 0
-#define ELFCLASS32 1
-#define ELFCLASS64 2
-
-// Encoding
-#define ELFDATANONE 0
-#define ELFDATA2LSB 1
-#define ELFDATA2MSB 2
-
-// OS extensions
-#define ELFOSABI_NONE 0 // No extensions or unspecified
-#define ELFOSABI_HPUX 1 // Hewlett-Packard HP-UX
-#define ELFOSABI_NETBSD 2 // NetBSD
-#define ELFOSABI_LINUX 3 // Linux
-#define ELFOSABI_SOLARIS 6 // Sun Solaris
-#define ELFOSABI_AIX 7 // AIX
-#define ELFOSABI_IRIX 8 // IRIX
-#define ELFOSABI_FREEBSD 9 // FreeBSD
-#define ELFOSABI_TRU64 10 // Compaq TRU64 UNIX
-#define ELFOSABI_MODESTO 11 // Novell Modesto
-#define ELFOSABI_OPENBSD 12 // Open BSD
-#define ELFOSABI_OPENVMS 13 // Open VMS
-#define ELFOSABI_NSK 14 // Hewlett-Packard Non-Stop Kernel
-#define ELFOSABI_AROS 15 // Amiga Research OS
-#define ELFOSABI_FENIXOS 16 // The FenixOS highly scalable multi-core OS
-// 64-255 Architecture-specific value range
-
-
-/////////////////////
-// Sections constants
-
-// Section indexes
-#define SHN_UNDEF 0
-#define SHN_LORESERVE 0xFF00
-#define SHN_LOPROC 0xFF00
-#define SHN_HIPROC 0xFF1F
-#define SHN_LOOS 0xFF20
-#define SHN_HIOS 0xFF3F
-#define SHN_ABS 0xFFF1
-#define SHN_COMMON 0xFFF2
-#define SHN_XINDEX 0xFFFF
-#define SHN_HIRESERVE 0xFFFF
-
-// Section types
-#define SHT_NULL 0
-#define SHT_PROGBITS 1
-#define SHT_SYMTAB 2
-#define SHT_STRTAB 3
-#define SHT_RELA 4
-#define SHT_HASH 5
-#define SHT_DYNAMIC 6
-#define SHT_NOTE 7
-#define SHT_NOBITS 8
-#define SHT_REL 9
-#define SHT_SHLIB 10
-#define SHT_DYNSYM 11
-#define SHT_INIT_ARRAY 14
-#define SHT_FINI_ARRAY 15
-#define SHT_PREINIT_ARRAY 16
-#define SHT_GROUP 17
-#define SHT_SYMTAB_SHNDX 18
-#define SHT_LOOS 0x60000000
-#define SHT_HIOS 0x6fffffff
-#define SHT_LOPROC 0x70000000
-#define SHT_HIPROC 0x7FFFFFFF
-#define SHT_LOUSER 0x80000000
-#define SHT_HIUSER 0xFFFFFFFF
-
-// Section attribute flags
-#define SHF_WRITE 0x1
-#define SHF_ALLOC 0x2
-#define SHF_EXECINSTR 0x4
-#define SHF_MERGE 0x10
-#define SHF_STRINGS 0x20
-#define SHF_INFO_LINK 0x40
-#define SHF_LINK_ORDER 0x80
-#define SHF_OS_NONCONFORMING 0x100
-#define SHF_GROUP 0x200
-#define SHF_TLS 0x400
-#define SHF_MASKOS 0x0ff00000
-#define SHF_MASKPROC 0xF0000000
-
-// Section group flags
-#define GRP_COMDAT 0x1
-#define GRP_MASKOS 0x0ff00000
-#define GRP_MASKPROC 0xf0000000
-
-// Symbol binding
-#define STB_LOCAL 0
-#define STB_GLOBAL 1
-#define STB_WEAK 2
-#define STB_LOOS 10
-#define STB_HIOS 12
-#define STB_MULTIDEF 13
-#define STB_LOPROC 13
-#define STB_HIPROC 15
-
-// Symbol types
-#define STT_NOTYPE 0
-#define STT_OBJECT 1
-#define STT_FUNC 2
-#define STT_SECTION 3
-#define STT_FILE 4
-#define STT_COMMON 5
-#define STT_TLS 6
-#define STT_LOOS 10
-#define STT_HIOS 12
-#define STT_LOPROC 13
-#define STT_HIPROC 15
-
-// Symbol visibility
-#define STV_DEFAULT 0
-#define STV_INTERNAL 1
-#define STV_HIDDEN 2
-#define STV_PROTECTED 3
-
-// Undefined name
-#define STN_UNDEF 0
-
-// Relocation types
-#define R_386_NONE 0
-#define R_X86_64_NONE 0
-#define R_386_32 1
-#define R_X86_64_64 1
-#define R_386_PC32 2
-#define R_X86_64_PC32 2
-#define R_386_GOT32 3
-#define R_X86_64_GOT32 3
-#define R_386_PLT32 4
-#define R_X86_64_PLT32 4
-#define R_386_COPY 5
-#define R_X86_64_COPY 5
-#define R_386_GLOB_DAT 6
-#define R_X86_64_GLOB_DAT 6
-#define R_386_JMP_SLOT 7
-#define R_X86_64_JUMP_SLOT 7
-#define R_386_RELATIVE 8
-#define R_X86_64_RELATIVE 8
-#define R_386_GOTOFF 9
-#define R_X86_64_GOTPCREL 9
-#define R_386_GOTPC 10
-#define R_X86_64_32 10
-#define R_X86_64_32S 11
-#define R_X86_64_16 12
-#define R_X86_64_PC16 13
-#define R_X86_64_8 14
-#define R_X86_64_PC8 15
-#define R_X86_64_DTPMOD64 16
-#define R_X86_64_DTPOFF64 17
-#define R_X86_64_TPOFF64 18
-#define R_X86_64_TLSGD 19
-#define R_X86_64_TLSLD 20
-#define R_X86_64_DTPOFF32 21
-#define R_X86_64_GOTTPOFF 22
-#define R_X86_64_TPOFF32 23
-#define R_X86_64_PC64 24
-#define R_X86_64_GOTOFF64 25
-#define R_X86_64_GOTPC32 26
-#define R_X86_64_GOT64 27
-#define R_X86_64_GOTPCREL64 28
-#define R_X86_64_GOTPC64 29
-#define R_X86_64_GOTPLT64 30
-#define R_X86_64_PLTOFF64 31
-#define R_X86_64_GOTPC32_TLSDESC 34
-#define R_X86_64_TLSDESC_CALL 35
-#define R_X86_64_TLSDESC 36
-#define R_X86_64_IRELATIVE 37
-#define R_X86_64_GNU_VTINHERIT 250
-#define R_X86_64_GNU_VTENTRY 251
-
-// Segment types
-#define PT_NULL 0
-#define PT_LOAD 1
-#define PT_DYNAMIC 2
-#define PT_INTERP 3
-#define PT_NOTE 4
-#define PT_SHLIB 5
-#define PT_PHDR 6
-#define PT_TLS 7
-#define PT_LOOS 0x60000000
-#define PT_HIOS 0x6fffffff
-#define PT_LOPROC 0x70000000
-#define PT_HIPROC 0x7FFFFFFF
-
-// Segment flags
-#define PF_X 1 // Execute
-#define PF_W 2 // Write
-#define PF_R 4 // Read
-#define PF_MASKOS 0x0ff00000 // Unspecified
-#define PF_MASKPROC 0xf0000000 // Unspecified
-
-// Dynamic Array Tags
-#define DT_NULL 0
-#define DT_NEEDED 1
-#define DT_PLTRELSZ 2
-#define DT_PLTGOT 3
-#define DT_HASH 4
-#define DT_STRTAB 5
-#define DT_SYMTAB 6
-#define DT_RELA 7
-#define DT_RELASZ 8
-#define DT_RELAENT 9
-#define DT_STRSZ 10
-#define DT_SYMENT 11
-#define DT_INIT 12
-#define DT_FINI 13
-#define DT_SONAME 14
-#define DT_RPATH 15
-#define DT_SYMBOLIC 16
-#define DT_REL 17
-#define DT_RELSZ 18
-#define DT_RELENT 19
-#define DT_PLTREL 20
-#define DT_DEBUG 21
-#define DT_TEXTREL 22
-#define DT_JMPREL 23
-#define DT_BIND_NOW 24
-#define DT_INIT_ARRAY 25
-#define DT_FINI_ARRAY 26
-#define DT_INIT_ARRAYSZ 27
-#define DT_FINI_ARRAYSZ 28
-#define DT_RUNPATH 29
-#define DT_FLAGS 30
-#define DT_ENCODING 32
-#define DT_PREINIT_ARRAY 32
-#define DT_PREINIT_ARRAYSZ 33
-#define DT_MAXPOSTAGS 34
-#define DT_LOOS 0x6000000D
-#define DT_HIOS 0x6ffff000
-#define DT_LOPROC 0x70000000
-#define DT_HIPROC 0x7FFFFFFF
-
-// DT_FLAGS values
-#define DF_ORIGIN 0x1
-#define DF_SYMBOLIC 0x2
-#define DF_TEXTREL 0x4
-#define DF_BIND_NOW 0x8
-#define DF_STATIC_TLS 0x10
-
-
-// ELF file header
-struct Elf32_Ehdr {
- unsigned char e_ident[EI_NIDENT];
- Elf_Half e_type;
- Elf_Half e_machine;
- Elf_Word e_version;
- Elf32_Addr e_entry;
- Elf32_Off e_phoff;
- Elf32_Off e_shoff;
- Elf_Word e_flags;
- Elf_Half e_ehsize;
- Elf_Half e_phentsize;
- Elf_Half e_phnum;
- Elf_Half e_shentsize;
- Elf_Half e_shnum;
- Elf_Half e_shstrndx;
-};
-
-struct Elf64_Ehdr {
- unsigned char e_ident[EI_NIDENT];
- Elf_Half e_type;
- Elf_Half e_machine;
- Elf_Word e_version;
- Elf64_Addr e_entry;
- Elf64_Off e_phoff;
- Elf64_Off e_shoff;
- Elf_Word e_flags;
- Elf_Half e_ehsize;
- Elf_Half e_phentsize;
- Elf_Half e_phnum;
- Elf_Half e_shentsize;
- Elf_Half e_shnum;
- Elf_Half e_shstrndx;
-};
-
-
-// Section header
-struct Elf32_Shdr {
- Elf_Word sh_name;
- Elf_Word sh_type;
- Elf_Word sh_flags;
- Elf32_Addr sh_addr;
- Elf32_Off sh_offset;
- Elf_Word sh_size;
- Elf_Word sh_link;
- Elf_Word sh_info;
- Elf_Word sh_addralign;
- Elf_Word sh_entsize;
-};
-
-struct Elf64_Shdr {
- Elf_Word sh_name;
- Elf_Word sh_type;
- Elf_Xword sh_flags;
- Elf64_Addr sh_addr;
- Elf64_Off sh_offset;
- Elf_Xword sh_size;
- Elf_Word sh_link;
- Elf_Word sh_info;
- Elf_Xword sh_addralign;
- Elf_Xword sh_entsize;
-};
-
-
-// Segment header
-struct Elf32_Phdr {
- Elf_Word p_type;
- Elf32_Off p_offset;
- Elf32_Addr p_vaddr;
- Elf32_Addr p_paddr;
- Elf_Word p_filesz;
- Elf_Word p_memsz;
- Elf_Word p_flags;
- Elf_Word p_align;
-};
-
-struct Elf64_Phdr {
- Elf_Word p_type;
- Elf_Word p_flags;
- Elf64_Off p_offset;
- Elf64_Addr p_vaddr;
- Elf64_Addr p_paddr;
- Elf_Xword p_filesz;
- Elf_Xword p_memsz;
- Elf_Xword p_align;
-};
-
-
-// Symbol table entry
-struct Elf32_Sym {
- Elf_Word st_name;
- Elf32_Addr st_value;
- Elf_Word st_size;
- unsigned char st_info;
- unsigned char st_other;
- Elf_Half st_shndx;
-};
-
-struct Elf64_Sym {
- Elf_Word st_name;
- unsigned char st_info;
- unsigned char st_other;
- Elf_Half st_shndx;
- Elf64_Addr st_value;
- Elf_Xword st_size;
-};
-
-
-#define ELF_ST_BIND(i) ((i) >> 4)
-#define ELF_ST_TYPE(i) ((i)&0xf)
-#define ELF_ST_INFO(b, t) (((b) << 4) + ((t)&0xf))
-
-#define ELF_ST_VISIBILITY(o) ((o)&0x3)
-
-
-// Relocation entries
-struct Elf32_Rel {
- Elf32_Addr r_offset;
- Elf_Word r_info;
-};
-
-struct Elf32_Rela {
- Elf32_Addr r_offset;
- Elf_Word r_info;
- Elf_Sword r_addend;
-};
-
-struct Elf64_Rel {
- Elf64_Addr r_offset;
- Elf_Xword r_info;
-};
-
-struct Elf64_Rela {
- Elf64_Addr r_offset;
- Elf_Xword r_info;
- Elf_Sxword r_addend;
-};
-
-
-#define ELF32_R_SYM(i) ((i) >> 8)
-#define ELF32_R_TYPE(i) ((unsigned char)(i))
-#define ELF32_R_INFO(s, t) (((s) << 8) + (unsigned char)(t))
-
-#define ELF64_R_SYM(i) ((i) >> 32)
-#define ELF64_R_TYPE(i) ((i)&0xffffffffL)
-#define ELF64_R_INFO(s, t) ((((int64_t)s) << 32) + ((t)&0xffffffffL))
-
-// Dynamic structure
-struct Elf32_Dyn {
- Elf_Sword d_tag;
- union {
- Elf_Word d_val;
- Elf32_Addr d_ptr;
- } d_un;
-};
-
-struct Elf64_Dyn {
- Elf_Sxword d_tag;
- union {
- Elf_Xword d_val;
- Elf64_Addr d_ptr;
- } d_un;
-};
-
-} // namespace ELFIO
-
-#endif // ELFTYPES_H
diff --git a/third_party/rocm/include/hip/hcc_detail/elfio/elfio.hpp b/third_party/rocm/include/hip/hcc_detail/elfio/elfio.hpp
deleted file mode 100644
index 6bc0418..0000000
--- a/third_party/rocm/include/hip/hcc_detail/elfio/elfio.hpp
+++ /dev/null
@@ -1,740 +0,0 @@
-/*
-Copyright (C) 2001-2015 by Serge Lamikhov-Center
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef ELFIO_HPP
-#define ELFIO_HPP
-
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4996)
-#pragma warning(disable : 4355)
-#pragma warning(disable : 4244)
-#endif
-
-#include <string>
-#include <iostream>
-#include <fstream>
-#include <algorithm>
-#include <vector>
-#include <deque>
-#include <iterator>
-#include <typeinfo>
-
-#include "elf_types.hpp"
-#include "elfio_utils.hpp"
-#include "elfio_header.hpp"
-#include "elfio_section.hpp"
-#include "elfio_segment.hpp"
-#include "elfio_strings.hpp"
-
-#define ELFIO_HEADER_ACCESS_GET(TYPE, FNAME) \
- TYPE get_##FNAME() const { return header->get_##FNAME(); }
-
-#define ELFIO_HEADER_ACCESS_GET_SET(TYPE, FNAME) \
- TYPE get_##FNAME() const { return header->get_##FNAME(); } \
- void set_##FNAME(TYPE val) { header->set_##FNAME(val); }
-
-namespace ELFIO {
-
-//------------------------------------------------------------------------------
-class elfio {
- public:
- //------------------------------------------------------------------------------
- elfio() : sections(this), segments(this) {
- header = 0;
- current_file_pos = 0;
- create(ELFCLASS32, ELFDATA2LSB);
- }
-
- //------------------------------------------------------------------------------
- ~elfio() { clean(); }
-
- //------------------------------------------------------------------------------
- void create(unsigned char file_class, unsigned char encoding) {
- clean();
- convertor.setup(encoding);
- header = create_header(file_class, encoding);
- create_mandatory_sections();
- }
-
- //------------------------------------------------------------------------------
- bool load(const std::string& file_name) {
- std::ifstream stream;
- stream.open(file_name.c_str(), std::ios::in | std::ios::binary);
- if (!stream) {
- return false;
- }
-
- return load(stream);
- }
-
- //------------------------------------------------------------------------------
- bool load(std::istream& stream) {
- clean();
-
- unsigned char e_ident[EI_NIDENT];
-
- // Read ELF file signature
- stream.seekg(0);
- stream.read(reinterpret_cast<char*>(&e_ident), sizeof(e_ident));
-
- // Is it ELF file?
- if (stream.gcount() != sizeof(e_ident) || e_ident[EI_MAG0] != ELFMAG0 ||
- e_ident[EI_MAG1] != ELFMAG1 || e_ident[EI_MAG2] != ELFMAG2 ||
- e_ident[EI_MAG3] != ELFMAG3) {
- return false;
- }
-
- if ((e_ident[EI_CLASS] != ELFCLASS64) && (e_ident[EI_CLASS] != ELFCLASS32)) {
- return false;
- }
-
- convertor.setup(e_ident[EI_DATA]);
-
- header = create_header(e_ident[EI_CLASS], e_ident[EI_DATA]);
- if (0 == header) {
- return false;
- }
- if (!header->load(stream)) {
- return false;
- }
-
- load_sections(stream);
- load_segments(stream);
-
- return true;
- }
-
- //------------------------------------------------------------------------------
- bool save(const std::string& file_name) {
- std::ofstream f(file_name.c_str(), std::ios::out | std::ios::binary);
-
- if (!f) {
- return false;
- }
-
- bool is_still_good = true;
-
- // Define layout specific header fields
- // The position of the segment table is fixed after the header.
- // The position of the section table is variable and needs to be fixed
- // before saving.
- header->set_segments_num(segments.size());
- header->set_segments_offset(segments.size() ? header->get_header_size() : 0);
- header->set_sections_num(sections.size());
- header->set_sections_offset(0);
-
- // Layout the first section right after the segment table
- current_file_pos = header->get_header_size() +
- header->get_segment_entry_size() * header->get_segments_num();
-
- is_still_good = layout_segments_and_their_sections();
- is_still_good = is_still_good && layout_sections_without_segments();
- is_still_good = is_still_good && layout_section_table();
-
- is_still_good = is_still_good && save_header(f);
- is_still_good = is_still_good && save_sections(f);
- is_still_good = is_still_good && save_segments(f);
-
- f.close();
-
- return is_still_good;
- }
-
- //------------------------------------------------------------------------------
- // ELF header access functions
- ELFIO_HEADER_ACCESS_GET(unsigned char, class);
- ELFIO_HEADER_ACCESS_GET(unsigned char, elf_version);
- ELFIO_HEADER_ACCESS_GET(unsigned char, encoding);
- ELFIO_HEADER_ACCESS_GET(Elf_Word, version);
- ELFIO_HEADER_ACCESS_GET(Elf_Half, header_size);
- ELFIO_HEADER_ACCESS_GET(Elf_Half, section_entry_size);
- ELFIO_HEADER_ACCESS_GET(Elf_Half, segment_entry_size);
-
- ELFIO_HEADER_ACCESS_GET_SET(unsigned char, os_abi);
- ELFIO_HEADER_ACCESS_GET_SET(unsigned char, abi_version);
- ELFIO_HEADER_ACCESS_GET_SET(Elf_Half, type);
- ELFIO_HEADER_ACCESS_GET_SET(Elf_Half, machine);
- ELFIO_HEADER_ACCESS_GET_SET(Elf_Word, flags);
- ELFIO_HEADER_ACCESS_GET_SET(Elf64_Addr, entry);
- ELFIO_HEADER_ACCESS_GET_SET(Elf64_Off, sections_offset);
- ELFIO_HEADER_ACCESS_GET_SET(Elf64_Off, segments_offset);
- ELFIO_HEADER_ACCESS_GET_SET(Elf_Half, section_name_str_index);
-
- //------------------------------------------------------------------------------
- const endianess_convertor& get_convertor() const { return convertor; }
-
- //------------------------------------------------------------------------------
- Elf_Xword get_default_entry_size(Elf_Word section_type) const {
- switch (section_type) {
- case SHT_RELA:
- if (header->get_class() == ELFCLASS64) {
- return sizeof(Elf64_Rela);
- } else {
- return sizeof(Elf32_Rela);
- }
- case SHT_REL:
- if (header->get_class() == ELFCLASS64) {
- return sizeof(Elf64_Rel);
- } else {
- return sizeof(Elf32_Rel);
- }
- case SHT_SYMTAB:
- if (header->get_class() == ELFCLASS64) {
- return sizeof(Elf64_Sym);
- } else {
- return sizeof(Elf32_Sym);
- }
- case SHT_DYNAMIC:
- if (header->get_class() == ELFCLASS64) {
- return sizeof(Elf64_Dyn);
- } else {
- return sizeof(Elf32_Dyn);
- }
- default:
- return 0;
- }
- }
-
- //------------------------------------------------------------------------------
- private:
- //------------------------------------------------------------------------------
- void clean() {
- delete header;
- header = 0;
-
- std::vector<section*>::const_iterator it;
- for (it = sections_.begin(); it != sections_.end(); ++it) {
- delete *it;
- }
- sections_.clear();
-
- std::vector<segment*>::const_iterator it1;
- for (it1 = segments_.begin(); it1 != segments_.end(); ++it1) {
- delete *it1;
- }
- segments_.clear();
- }
-
- //------------------------------------------------------------------------------
- elf_header* create_header(unsigned char file_class, unsigned char encoding) {
- elf_header* new_header = 0;
-
- if (file_class == ELFCLASS64) {
- new_header = new elf_header_impl<Elf64_Ehdr>(&convertor, encoding);
- } else if (file_class == ELFCLASS32) {
- new_header = new elf_header_impl<Elf32_Ehdr>(&convertor, encoding);
- } else {
- return 0;
- }
-
- return new_header;
- }
-
- //------------------------------------------------------------------------------
- section* create_section() {
- section* new_section;
- unsigned char file_class = get_class();
-
- if (file_class == ELFCLASS64) {
- new_section = new section_impl<Elf64_Shdr>(&convertor);
- } else if (file_class == ELFCLASS32) {
- new_section = new section_impl<Elf32_Shdr>(&convertor);
- } else {
- return 0;
- }
-
- new_section->set_index((Elf_Half)sections_.size());
- sections_.push_back(new_section);
-
- return new_section;
- }
-
-
- //------------------------------------------------------------------------------
- segment* create_segment() {
- segment* new_segment;
- unsigned char file_class = header->get_class();
-
- if (file_class == ELFCLASS64) {
- new_segment = new segment_impl<Elf64_Phdr>(&convertor);
- } else if (file_class == ELFCLASS32) {
- new_segment = new segment_impl<Elf32_Phdr>(&convertor);
- } else {
- return 0;
- }
-
- new_segment->set_index((Elf_Half)segments_.size());
- segments_.push_back(new_segment);
-
- return new_segment;
- }
-
- //------------------------------------------------------------------------------
- void create_mandatory_sections() {
- // Create null section without calling to 'add_section' as no string
- // section containing section names exists yet
- section* sec0 = create_section();
- sec0->set_index(0);
- sec0->set_name("");
- sec0->set_name_string_offset(0);
-
- set_section_name_str_index(1);
- section* shstrtab = sections.add(".shstrtab");
- shstrtab->set_type(SHT_STRTAB);
- shstrtab->set_addr_align(1);
- }
-
- //------------------------------------------------------------------------------
- Elf_Half load_sections(std::istream& stream) {
- Elf_Half entry_size = header->get_section_entry_size();
- Elf_Half num = header->get_sections_num();
- Elf64_Off offset = header->get_sections_offset();
-
- for (Elf_Half i = 0; i < num; ++i) {
- section* sec = create_section();
- sec->load(stream, (std::streamoff)offset + i * entry_size);
- sec->set_index(i);
- // To mark that the section is not permitted to reassign address
- // during layout calculation
- sec->set_address(sec->get_address());
- }
-
- Elf_Half shstrndx = get_section_name_str_index();
-
- if (SHN_UNDEF != shstrndx) {
- string_section_accessor str_reader(sections[shstrndx]);
- for (Elf_Half i = 0; i < num; ++i) {
- Elf_Word offset = sections[i]->get_name_string_offset();
- const char* p = str_reader.get_string(offset);
- if (p != 0) {
- sections[i]->set_name(p);
- }
- }
- }
-
- return num;
- }
-
- //------------------------------------------------------------------------------
- bool load_segments(std::istream& stream) {
- Elf_Half entry_size = header->get_segment_entry_size();
- Elf_Half num = header->get_segments_num();
- Elf64_Off offset = header->get_segments_offset();
-
- for (Elf_Half i = 0; i < num; ++i) {
- segment* seg;
- unsigned char file_class = header->get_class();
-
- if (file_class == ELFCLASS64) {
- seg = new segment_impl<Elf64_Phdr>(&convertor);
- } else if (file_class == ELFCLASS32) {
- seg = new segment_impl<Elf32_Phdr>(&convertor);
- } else {
- return false;
- }
-
- seg->load(stream, (std::streamoff)offset + i * entry_size);
- seg->set_index(i);
-
- // Add sections to the segments (similar to readelfs algorithm)
- Elf64_Off segBaseOffset = seg->get_offset();
- Elf64_Off segEndOffset = segBaseOffset + seg->get_file_size();
- Elf64_Off segVBaseAddr = seg->get_virtual_address();
- Elf64_Off segVEndAddr = segVBaseAddr + seg->get_memory_size();
- for (Elf_Half j = 0; j < sections.size(); ++j) {
- const section* psec = sections[j];
-
- // SHF_ALLOC sections are matched based on the virtual address
- // otherwise the file offset is matched
- if (psec->get_flags() & SHF_ALLOC
- ? (segVBaseAddr <= psec->get_address() &&
- psec->get_address() + psec->get_size() <= segVEndAddr)
- : (segBaseOffset <= psec->get_offset() &&
- psec->get_offset() + psec->get_size() <= segEndOffset)) {
- seg->add_section_index(psec->get_index(), psec->get_addr_align());
- }
- }
-
- // Add section into the segments' container
- segments_.push_back(seg);
- }
-
- return true;
- }
-
- //------------------------------------------------------------------------------
- bool save_header(std::ofstream& f) { return header->save(f); }
-
- //------------------------------------------------------------------------------
- bool save_sections(std::ofstream& f) {
- for (unsigned int i = 0; i < sections_.size(); ++i) {
- section* sec = sections_.at(i);
-
- std::streampos headerPosition = (std::streamoff)header->get_sections_offset() +
- header->get_section_entry_size() * sec->get_index();
-
- sec->save(f, headerPosition, sec->get_offset());
- }
- return true;
- }
-
- //------------------------------------------------------------------------------
- bool save_segments(std::ofstream& f) {
- for (unsigned int i = 0; i < segments_.size(); ++i) {
- segment* seg = segments_.at(i);
-
- std::streampos headerPosition =
- header->get_segments_offset() + header->get_segment_entry_size() * seg->get_index();
-
- seg->save(f, headerPosition, seg->get_offset());
- }
- return true;
- }
-
- //------------------------------------------------------------------------------
- bool is_section_without_segment(unsigned int section_index) {
- bool found = false;
-
- for (unsigned int j = 0; !found && (j < segments.size()); ++j) {
- for (unsigned int k = 0; !found && (k < segments[j]->get_sections_num()); ++k) {
- found = segments[j]->get_section_index_at(k) == section_index;
- }
- }
-
- return !found;
- }
-
- //------------------------------------------------------------------------------
- bool is_subsequence_of(segment* seg1, segment* seg2) {
- // Return 'true' if sections of seg1 are a subset of sections in seg2
- const std::vector<Elf_Half>& sections1 = seg1->get_sections();
- const std::vector<Elf_Half>& sections2 = seg2->get_sections();
-
- bool found = false;
- if (sections1.size() < sections2.size()) {
- found = std::includes(sections2.begin(), sections2.end(), sections1.begin(),
- sections1.end());
- }
-
- return found;
- }
-
- //------------------------------------------------------------------------------
- std::vector<segment*> get_ordered_segments() {
- std::vector<segment*> res;
- std::deque<segment*> worklist;
-
- res.reserve(segments.size());
- std::copy(segments_.begin(), segments_.end(), std::back_inserter(worklist));
-
- // Bring the segments which start at address 0 to the front
- size_t nextSlot = 0;
- for (size_t i = 0; i < worklist.size(); ++i) {
- if (i != nextSlot && worklist[i]->is_offset_initialized() &&
- worklist[i]->get_offset() == 0) {
- std::swap(worklist[i], worklist[nextSlot]);
- ++nextSlot;
- }
- }
-
- while (!worklist.empty()) {
- segment* seg = worklist.front();
- worklist.pop_front();
-
- size_t i = 0;
- for (; i < worklist.size(); ++i) {
- if (is_subsequence_of(seg, worklist[i])) {
- break;
- }
- }
-
- if (i < worklist.size())
- worklist.push_back(seg);
- else
- res.push_back(seg);
- }
-
- return res;
- }
-
-
- //------------------------------------------------------------------------------
- bool layout_sections_without_segments() {
- for (unsigned int i = 0; i < sections_.size(); ++i) {
- if (is_section_without_segment(i)) {
- section* sec = sections_[i];
-
- Elf_Xword section_align = sec->get_addr_align();
- if (section_align > 1 && current_file_pos % section_align != 0) {
- current_file_pos += section_align - current_file_pos % section_align;
- }
-
- if (0 != sec->get_index()) sec->set_offset(current_file_pos);
-
- if (SHT_NOBITS != sec->get_type() && SHT_NULL != sec->get_type()) {
- current_file_pos += sec->get_size();
- }
- }
- }
-
- return true;
- }
-
-
- //------------------------------------------------------------------------------
- bool layout_segments_and_their_sections() {
- std::vector<segment*> worklist;
- std::vector<bool> section_generated(sections.size(), false);
-
- // Get segments in a order in where segments which contain a
- // sub sequence of other segments are located at the end
- worklist = get_ordered_segments();
-
- for (unsigned int i = 0; i < worklist.size(); ++i) {
- Elf_Xword segment_memory = 0;
- Elf_Xword segment_filesize = 0;
- Elf_Xword seg_start_pos = current_file_pos;
- segment* seg = worklist[i];
-
- // Special case: PHDR segment
- // This segment contains the program headers but no sections
- if (seg->get_type() == PT_PHDR && seg->get_sections_num() == 0) {
- seg_start_pos = header->get_segments_offset();
- segment_memory = segment_filesize =
- header->get_segment_entry_size() * header->get_segments_num();
- }
- // Special case:
- // Segments which start with the NULL section and have further sections
- else if (seg->get_sections_num() > 1 &&
- sections[seg->get_section_index_at(0)]->get_type() == SHT_NULL) {
- seg_start_pos = 0;
- if (seg->get_sections_num()) {
- segment_memory = segment_filesize = current_file_pos;
- }
- }
- // New segments with not generated sections
- // have to be aligned
- else if (seg->get_sections_num() && !section_generated[seg->get_section_index_at(0)]) {
- Elf64_Off cur_page_alignment = current_file_pos % seg->get_align();
- Elf64_Off req_page_alignment = seg->get_virtual_address() % seg->get_align();
- Elf64_Off error = req_page_alignment - cur_page_alignment;
-
- current_file_pos += (seg->get_align() + error) % seg->get_align();
- seg_start_pos = current_file_pos;
- } else if (seg->get_sections_num()) {
- seg_start_pos = sections[seg->get_section_index_at(0)]->get_offset();
- }
-
- // Write segment's data
- for (unsigned int j = 0; j < seg->get_sections_num(); ++j) {
- Elf_Half index = seg->get_section_index_at(j);
-
- section* sec = sections[index];
-
- // The NULL section is always generated
- if (SHT_NULL == sec->get_type()) {
- section_generated[index] = true;
- continue;
- }
-
- Elf_Xword secAlign = 0;
- // Fix up the alignment
- if (!section_generated[index] && sec->is_address_initialized() &&
- SHT_NOBITS != sec->get_type() && SHT_NULL != sec->get_type()) {
- // Align the sections based on the virtual addresses
- // when possible (this is what matters for execution)
- Elf64_Off req_offset = sec->get_address() - seg->get_virtual_address();
- Elf64_Off cur_offset = current_file_pos - seg_start_pos;
- secAlign = req_offset - cur_offset;
- } else if (!section_generated[index]) {
- // If no address has been specified then only the section
- // alignment constraint has to be matched
- Elf_Xword align = sec->get_addr_align();
- if (align == 0) {
- align = 1;
- }
- Elf64_Off error = current_file_pos % align;
- secAlign = (align - error) % align;
- } else {
- // Alignment for already generated sections
- secAlign = sec->get_offset() - seg_start_pos - segment_filesize;
- }
-
- // Determine the segment file and memory sizes
- // Special case .tbss section (NOBITS) in non TLS segment
- if ((sec->get_flags() & SHF_ALLOC) &&
- !((sec->get_flags() & SHF_TLS) && (seg->get_type() != PT_TLS) &&
- (SHT_NOBITS == sec->get_type())))
- segment_memory += sec->get_size() + secAlign;
- if (SHT_NOBITS != sec->get_type() && SHT_NULL != sec->get_type())
- segment_filesize += sec->get_size() + secAlign;
-
- // Nothing to be done when generating nested segments
- if (section_generated[index]) {
- continue;
- }
-
- current_file_pos += secAlign;
-
- // Set the section addresses when missing
- if (!sec->is_address_initialized())
- sec->set_address(seg->get_virtual_address() + current_file_pos - seg_start_pos);
-
- if (0 != sec->get_index()) sec->set_offset(current_file_pos);
-
- if (SHT_NOBITS != sec->get_type() && SHT_NULL != sec->get_type())
- current_file_pos += sec->get_size();
- section_generated[index] = true;
- }
-
- seg->set_file_size(segment_filesize);
- seg->set_memory_size(segment_memory);
- seg->set_offset(seg_start_pos);
- }
-
- return true;
- }
-
- //------------------------------------------------------------------------------
- bool layout_section_table() {
- // Simply place the section table at the end for now
- Elf64_Off alignmentError = current_file_pos % 4;
- current_file_pos += (4 - alignmentError) % 4;
- header->set_sections_offset(current_file_pos);
- return true;
- }
-
-
- //------------------------------------------------------------------------------
- public:
- friend class Sections;
- class Sections {
- public:
- //------------------------------------------------------------------------------
- Sections(elfio* parent_) : parent(parent_) {}
-
- //------------------------------------------------------------------------------
- Elf_Half size() const { return (Elf_Half)parent->sections_.size(); }
-
- //------------------------------------------------------------------------------
- section* operator[](unsigned int index) const {
- section* sec = 0;
-
- if (index < parent->sections_.size()) {
- sec = parent->sections_[index];
- }
-
- return sec;
- }
-
- //------------------------------------------------------------------------------
- section* operator[](const std::string& name) const {
- section* sec = 0;
-
- std::vector<section*>::const_iterator it;
- for (it = parent->sections_.begin(); it != parent->sections_.end(); ++it) {
- if ((*it)->get_name() == name) {
- sec = *it;
- break;
- }
- }
-
- return sec;
- }
-
- //------------------------------------------------------------------------------
- section* add(const std::string& name) {
- section* new_section = parent->create_section();
- new_section->set_name(name);
-
- Elf_Half str_index = parent->get_section_name_str_index();
- section* string_table(parent->sections_[str_index]);
- string_section_accessor str_writer(string_table);
- Elf_Word pos = str_writer.add_string(name);
- new_section->set_name_string_offset(pos);
-
- return new_section;
- }
-
- //------------------------------------------------------------------------------
- std::vector<section*>::iterator begin() { return parent->sections_.begin(); }
-
- //------------------------------------------------------------------------------
- std::vector<section*>::iterator end() { return parent->sections_.end(); }
-
- //------------------------------------------------------------------------------
- private:
- elfio* parent;
- } sections;
-
- //------------------------------------------------------------------------------
- public:
- friend class Segments;
- class Segments {
- public:
- //------------------------------------------------------------------------------
- Segments(elfio* parent_) : parent(parent_) {}
-
- //------------------------------------------------------------------------------
- Elf_Half size() const { return (Elf_Half)parent->segments_.size(); }
-
- //------------------------------------------------------------------------------
- segment* operator[](unsigned int index) const { return parent->segments_[index]; }
-
-
- //------------------------------------------------------------------------------
- segment* add() { return parent->create_segment(); }
-
- //------------------------------------------------------------------------------
- std::vector<segment*>::iterator begin() { return parent->segments_.begin(); }
-
- //------------------------------------------------------------------------------
- std::vector<segment*>::iterator end() { return parent->segments_.end(); }
-
- //------------------------------------------------------------------------------
- private:
- elfio* parent;
- } segments;
-
- //------------------------------------------------------------------------------
- private:
- elf_header* header;
- std::vector<section*> sections_;
- std::vector<segment*> segments_;
- endianess_convertor convertor;
-
- Elf_Xword current_file_pos;
-};
-
-} // namespace ELFIO
-
-#include "elfio_symbols.hpp"
-#include "elfio_note.hpp"
-#include "elfio_relocation.hpp"
-#include "elfio_dynamic.hpp"
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
-#endif // ELFIO_HPP
diff --git a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_dump.hpp b/third_party/rocm/include/hip/hcc_detail/elfio/elfio_dump.hpp
deleted file mode 100644
index c40a010..0000000
--- a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_dump.hpp
+++ /dev/null
@@ -1,825 +0,0 @@
-/*
-Copyright (C) 2001-2015 by Serge Lamikhov-Center
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef ELFIO_DUMP_HPP
-#define ELFIO_DUMP_HPP
-
-#include <algorithm>
-#include <string>
-#include <ostream>
-#include <sstream>
-#include <iomanip>
-#include "elfio.hpp"
-
-namespace ELFIO {
-
-
-static struct class_table_t {
- const char key;
- const char* str;
-} class_table[] = {
- {ELFCLASS32, "ELF32"},
- {ELFCLASS64, "ELF64"},
-};
-
-
-static struct endian_table_t {
- const char key;
- const char* str;
-} endian_table[] = {
- {ELFDATANONE, "None"},
- {ELFDATA2LSB, "Little endian"},
- {ELFDATA2MSB, "Big endian"},
-};
-
-
-static struct version_table_t {
- const Elf64_Word key;
- const char* str;
-} version_table[] = {
- {EV_NONE, "None"},
- {EV_CURRENT, "Current"},
-};
-
-
-static struct type_table_t {
- const Elf32_Half key;
- const char* str;
-} type_table[] = {
- {ET_NONE, "No file type"}, {ET_REL, "Relocatable file"}, {ET_EXEC, "Executable file"},
- {ET_DYN, "Shared object file"}, {ET_CORE, "Core file"},
-};
-
-
-static struct machine_table_t {
- const Elf64_Half key;
- const char* str;
-} machine_table[] = {
- {EM_NONE, "No machine"},
- {EM_M32, "AT&T WE 32100"},
- {EM_SPARC, "SUN SPARC"},
- {EM_386, "Intel 80386"},
- {EM_68K, "Motorola m68k family"},
- {EM_88K, "Motorola m88k family"},
- {EM_486, "Intel 80486// Reserved for future use"},
- {EM_860, "Intel 80860"},
- {EM_MIPS, "MIPS R3000 (officially, big-endian only)"},
- {EM_S370, "IBM System/370"},
- {EM_MIPS_RS3_LE, "MIPS R3000 little-endian (Oct 4 1999 Draft) Deprecated"},
- {EM_res011, "Reserved"},
- {EM_res012, "Reserved"},
- {EM_res013, "Reserved"},
- {EM_res014, "Reserved"},
- {EM_PARISC, "HPPA"},
- {EM_res016, "Reserved"},
- {EM_VPP550, "Fujitsu VPP500"},
- {EM_SPARC32PLUS, "Sun's v8plus"},
- {EM_960, "Intel 80960"},
- {EM_PPC, "PowerPC"},
- {EM_PPC64, "64-bit PowerPC"},
- {EM_S390, "IBM S/390"},
- {EM_SPU, "Sony/Toshiba/IBM SPU"},
- {EM_res024, "Reserved"},
- {EM_res025, "Reserved"},
- {EM_res026, "Reserved"},
- {EM_res027, "Reserved"},
- {EM_res028, "Reserved"},
- {EM_res029, "Reserved"},
- {EM_res030, "Reserved"},
- {EM_res031, "Reserved"},
- {EM_res032, "Reserved"},
- {EM_res033, "Reserved"},
- {EM_res034, "Reserved"},
- {EM_res035, "Reserved"},
- {EM_V800, "NEC V800 series"},
- {EM_FR20, "Fujitsu FR20"},
- {EM_RH32, "TRW RH32"},
- {EM_MCORE, "Motorola M*Core // May also be taken by Fujitsu MMA"},
- {EM_RCE, "Old name for MCore"},
- {EM_ARM, "ARM"},
- {EM_OLD_ALPHA, "Digital Alpha"},
- {EM_SH, "Renesas (formerly Hitachi) / SuperH SH"},
- {EM_SPARCV9, "SPARC v9 64-bit"},
- {EM_TRICORE, "Siemens Tricore embedded processor"},
- {EM_ARC, "ARC Cores"},
- {EM_H8_300, "Renesas (formerly Hitachi) H8/300"},
- {EM_H8_300H, "Renesas (formerly Hitachi) H8/300H"},
- {EM_H8S, "Renesas (formerly Hitachi) H8S"},
- {EM_H8_500, "Renesas (formerly Hitachi) H8/500"},
- {EM_IA_64, "Intel IA-64 Processor"},
- {EM_MIPS_X, "Stanford MIPS-X"},
- {EM_COLDFIRE, "Motorola Coldfire"},
- {EM_68HC12, "Motorola M68HC12"},
- {EM_MMA, "Fujitsu Multimedia Accelerator"},
- {EM_PCP, "Siemens PCP"},
- {EM_NCPU, "Sony nCPU embedded RISC processor"},
- {EM_NDR1, "Denso NDR1 microprocesspr"},
- {EM_STARCORE, "Motorola Star*Core processor"},
- {EM_ME16, "Toyota ME16 processor"},
- {EM_ST100, "STMicroelectronics ST100 processor"},
- {EM_TINYJ, "Advanced Logic Corp. TinyJ embedded processor"},
- {EM_X86_64, "Advanced Micro Devices X86-64 processor"},
- {EM_PDSP, "Sony DSP Processor"},
- {EM_PDP10, "Digital Equipment Corp. PDP-10"},
- {EM_PDP11, "Digital Equipment Corp. PDP-11"},
- {EM_FX66, "Siemens FX66 microcontroller"},
- {EM_ST9PLUS, "STMicroelectronics ST9+ 8/16 bit microcontroller"},
- {EM_ST7, "STMicroelectronics ST7 8-bit microcontroller"},
- {EM_68HC16, "Motorola MC68HC16 Microcontroller"},
- {EM_68HC11, "Motorola MC68HC11 Microcontroller"},
- {EM_68HC08, "Motorola MC68HC08 Microcontroller"},
- {EM_68HC05, "Motorola MC68HC05 Microcontroller"},
- {EM_SVX, "Silicon Graphics SVx"},
- {EM_ST19, "STMicroelectronics ST19 8-bit cpu"},
- {EM_VAX, "Digital VAX"},
- {EM_CRIS, "Axis Communications 32-bit embedded processor"},
- {EM_JAVELIN, "Infineon Technologies 32-bit embedded cpu"},
- {EM_FIREPATH, "Element 14 64-bit DSP processor"},
- {EM_ZSP, "LSI Logic's 16-bit DSP processor"},
- {EM_MMIX, "Donald Knuth's educational 64-bit processor"},
- {EM_HUANY, "Harvard's machine-independent format"},
- {EM_PRISM, "SiTera Prism"},
- {EM_AVR, "Atmel AVR 8-bit microcontroller"},
- {EM_FR30, "Fujitsu FR30"},
- {EM_D10V, "Mitsubishi D10V"},
- {EM_D30V, "Mitsubishi D30V"},
- {EM_V850, "NEC v850"},
- {EM_M32R, "Renesas M32R (formerly Mitsubishi M32R)"},
- {EM_MN10300, "Matsushita MN10300"},
- {EM_MN10200, "Matsushita MN10200"},
- {EM_PJ, "picoJava"},
- {EM_OPENRISC, "OpenRISC 32-bit embedded processor"},
- {EM_ARC_A5, "ARC Cores Tangent-A5"},
- {EM_XTENSA, "Tensilica Xtensa Architecture"},
- {EM_VIDEOCORE, "Alphamosaic VideoCore processor"},
- {EM_TMM_GPP, "Thompson Multimedia General Purpose Processor"},
- {EM_NS32K, "National Semiconductor 32000 series"},
- {EM_TPC, "Tenor Network TPC processor"},
- {EM_SNP1K, "Trebia SNP 1000 processor"},
- {EM_ST200, "STMicroelectronics ST200 microcontroller"},
- {EM_IP2K, "Ubicom IP2022 micro controller"},
- {EM_MAX, "MAX Processor"},
- {EM_CR, "National Semiconductor CompactRISC"},
- {EM_F2MC16, "Fujitsu F2MC16"},
- {EM_MSP430, "TI msp430 micro controller"},
- {EM_BLACKFIN, "ADI Blackfin"},
- {EM_SE_C33, "S1C33 Family of Seiko Epson processors"},
- {EM_SEP, "Sharp embedded microprocessor"},
- {EM_ARCA, "Arca RISC Microprocessor"},
- {EM_UNICORE, "Microprocessor series from PKU-Unity Ltd. and MPRC of Peking University"},
- {EM_EXCESS, "eXcess: 16/32/64-bit configurable embedded CPU"},
- {EM_DXP, "Icera Semiconductor Inc. Deep Execution Processor"},
- {EM_ALTERA_NIOS2, "Altera Nios II soft-core processor"},
- {EM_CRX, "National Semiconductor CRX"},
- {EM_XGATE, "Motorola XGATE embedded processor"},
- {EM_C166, "Infineon C16x/XC16x processor"},
- {EM_M16C, "Renesas M16C series microprocessors"},
- {EM_DSPIC30F, "Microchip Technology dsPIC30F Digital Signal Controller"},
- {EM_CE, "Freescale Communication Engine RISC core"},
- {EM_M32C, "Renesas M32C series microprocessors"},
- {EM_res121, "Reserved"},
- {EM_res122, "Reserved"},
- {EM_res123, "Reserved"},
- {EM_res124, "Reserved"},
- {EM_res125, "Reserved"},
- {EM_res126, "Reserved"},
- {EM_res127, "Reserved"},
- {EM_res128, "Reserved"},
- {EM_res129, "Reserved"},
- {EM_res130, "Reserved"},
- {EM_TSK3000, "Altium TSK3000 core"},
- {EM_RS08, "Freescale RS08 embedded processor"},
- {EM_res133, "Reserved"},
- {EM_ECOG2, "Cyan Technology eCOG2 microprocessor"},
- {EM_SCORE, "Sunplus Score"},
- {EM_SCORE7, "Sunplus S+core7 RISC processor"},
- {EM_DSP24, "New Japan Radio (NJR) 24-bit DSP Processor"},
- {EM_VIDEOCORE3, "Broadcom VideoCore III processor"},
- {EM_LATTICEMICO32, "RISC processor for Lattice FPGA architecture"},
- {EM_SE_C17, "Seiko Epson C17 family"},
- {EM_TI_C6000, "Texas Instruments TMS320C6000 DSP family"},
- {EM_TI_C2000, "Texas Instruments TMS320C2000 DSP family"},
- {EM_TI_C5500, "Texas Instruments TMS320C55x DSP family"},
- {EM_res143, "Reserved"},
- {EM_res144, "Reserved"},
- {EM_res145, "Reserved"},
- {EM_res146, "Reserved"},
- {EM_res147, "Reserved"},
- {EM_res148, "Reserved"},
- {EM_res149, "Reserved"},
- {EM_res150, "Reserved"},
- {EM_res151, "Reserved"},
- {EM_res152, "Reserved"},
- {EM_res153, "Reserved"},
- {EM_res154, "Reserved"},
- {EM_res155, "Reserved"},
- {EM_res156, "Reserved"},
- {EM_res157, "Reserved"},
- {EM_res158, "Reserved"},
- {EM_res159, "Reserved"},
- {EM_MMDSP_PLUS, "STMicroelectronics 64bit VLIW Data Signal Processor"},
- {EM_CYPRESS_M8C, "Cypress M8C microprocessor"},
- {EM_R32C, "Renesas R32C series microprocessors"},
- {EM_TRIMEDIA, "NXP Semiconductors TriMedia architecture family"},
- {EM_QDSP6, "QUALCOMM DSP6 Processor"},
- {EM_8051, "Intel 8051 and variants"},
- {EM_STXP7X, "STMicroelectronics STxP7x family"},
- {EM_NDS32, "Andes Technology compact code size embedded RISC processor family"},
- {EM_ECOG1, "Cyan Technology eCOG1X family"},
- {EM_ECOG1X, "Cyan Technology eCOG1X family"},
- {EM_MAXQ30, "Dallas Semiconductor MAXQ30 Core Micro-controllers"},
- {EM_XIMO16, "New Japan Radio (NJR) 16-bit DSP Processor"},
- {EM_MANIK, "M2000 Reconfigurable RISC Microprocessor"},
- {EM_CRAYNV2, "Cray Inc. NV2 vector architecture"},
- {EM_RX, "Renesas RX family"},
- {EM_METAG, "Imagination Technologies META processor architecture"},
- {EM_MCST_ELBRUS, "MCST Elbrus general purpose hardware architecture"},
- {EM_ECOG16, "Cyan Technology eCOG16 family"},
- {EM_CR16, "National Semiconductor CompactRISC 16-bit processor"},
- {EM_ETPU, "Freescale Extended Time Processing Unit"},
- {EM_SLE9X, "Infineon Technologies SLE9X core"},
- {EM_L1OM, "Intel L1OM"},
- {EM_INTEL181, "Reserved by Intel"},
- {EM_INTEL182, "Reserved by Intel"},
- {EM_res183, "Reserved by ARM"},
- {EM_res184, "Reserved by ARM"},
- {EM_AVR32, "Atmel Corporation 32-bit microprocessor family"},
- {EM_STM8, "STMicroeletronics STM8 8-bit microcontroller"},
- {EM_TILE64, "Tilera TILE64 multicore architecture family"},
- {EM_TILEPRO, "Tilera TILEPro multicore architecture family"},
- {EM_MICROBLAZE, "Xilinx MicroBlaze 32-bit RISC soft processor core"},
- {EM_CUDA, "NVIDIA CUDA architecture "},
-};
-
-
-static struct section_type_table_t {
- const Elf64_Half key;
- const char* str;
-} section_type_table[] = {
- {SHT_NULL, "NULL"},
- {SHT_PROGBITS, "PROGBITS"},
- {SHT_SYMTAB, "SYMTAB"},
- {SHT_STRTAB, "STRTAB"},
- {SHT_RELA, "RELA"},
- {SHT_HASH, "HASH"},
- {SHT_DYNAMIC, "DYNAMIC"},
- {SHT_NOTE, "NOTE"},
- {SHT_NOBITS, "NOBITS"},
- {SHT_REL, "REL"},
- {SHT_SHLIB, "SHLIB"},
- {SHT_DYNSYM, "DYNSYM"},
- {SHT_INIT_ARRAY, "INIT_ARRAY"},
- {SHT_FINI_ARRAY, "FINI_ARRAY"},
- {SHT_PREINIT_ARRAY, "PREINIT_ARRAY"},
- {SHT_GROUP, "GROUP"},
- {SHT_SYMTAB_SHNDX, "SYMTAB_SHNDX "},
-};
-
-
-static struct segment_type_table_t {
- const Elf_Word key;
- const char* str;
-} segment_type_table[] = {
- {PT_NULL, "NULL"}, {PT_LOAD, "LOAD"}, {PT_DYNAMIC, "DYNAMIC"}, {PT_INTERP, "INTERP"},
- {PT_NOTE, "NOTE"}, {PT_SHLIB, "SHLIB"}, {PT_PHDR, "PHDR"}, {PT_TLS, "TLS"},
-};
-
-
-static struct segment_flag_table_t {
- const Elf_Word key;
- const char* str;
-} segment_flag_table[] = {
- {0, ""}, {1, "X"}, {2, "W"}, {3, "WX"}, {4, "R"}, {5, "RX"}, {6, "RW"}, {7, "RWX"},
-};
-
-
-static struct symbol_bind_t {
- const Elf_Word key;
- const char* str;
-} symbol_bind_table[] = {
- {STB_LOCAL, "LOCAL"}, {STB_GLOBAL, "GLOBAL"}, {STB_WEAK, "WEAK"},
- {STB_LOOS, "LOOS"}, {STB_HIOS, "HIOS"}, {STB_MULTIDEF, "MULTIDEF"},
- {STB_LOPROC, "LOPROC"}, {STB_HIPROC, "HIPROC"},
-};
-
-
-static struct symbol_type_t {
- const Elf_Word key;
- const char* str;
-} symbol_type_table[] = {
- {STT_NOTYPE, "NOTYPE"}, {STT_OBJECT, "OBJECT"}, {STT_FUNC, "FUNC"},
- {STT_SECTION, "SECTION"}, {STT_FILE, "FILE"}, {STT_COMMON, "COMMON"},
- {STT_TLS, "TLS"}, {STT_LOOS, "LOOS"}, {STT_HIOS, "HIOS"},
- {STT_LOPROC, "LOPROC"}, {STT_HIPROC, "HIPROC"},
-};
-
-
-static struct dynamic_tag_t {
- const Elf_Word key;
- const char* str;
-} dynamic_tag_table[] = {
- {DT_NULL, "NULL"},
- {DT_NEEDED, "NEEDED"},
- {DT_PLTRELSZ, "PLTRELSZ"},
- {DT_PLTGOT, "PLTGOT"},
- {DT_HASH, "HASH"},
- {DT_STRTAB, "STRTAB"},
- {DT_SYMTAB, "SYMTAB"},
- {DT_RELA, "RELA"},
- {DT_RELASZ, "RELASZ"},
- {DT_RELAENT, "RELAENT"},
- {DT_STRSZ, "STRSZ"},
- {DT_SYMENT, "SYMENT"},
- {DT_INIT, "INIT"},
- {DT_FINI, "FINI"},
- {DT_SONAME, "SONAME"},
- {DT_RPATH, "RPATH"},
- {DT_SYMBOLIC, "SYMBOLIC"},
- {DT_REL, "REL"},
- {DT_RELSZ, "RELSZ"},
- {DT_RELENT, "RELENT"},
- {DT_PLTREL, "PLTREL"},
- {DT_DEBUG, "DEBUG"},
- {DT_TEXTREL, "TEXTREL"},
- {DT_JMPREL, "JMPREL"},
- {DT_BIND_NOW, "BIND_NOW"},
- {DT_INIT_ARRAY, "INIT_ARRAY"},
- {DT_FINI_ARRAY, "FINI_ARRAY"},
- {DT_INIT_ARRAYSZ, "INIT_ARRAYSZ"},
- {DT_FINI_ARRAYSZ, "FINI_ARRAYSZ"},
- {DT_RUNPATH, "RUNPATH"},
- {DT_FLAGS, "FLAGS"},
- {DT_ENCODING, "ENCODING"},
- {DT_PREINIT_ARRAY, "PREINIT_ARRAY"},
- {DT_PREINIT_ARRAYSZ, "PREINIT_ARRAYSZ"},
- {DT_MAXPOSTAGS, "MAXPOSTAGS"},
-};
-
-static const ELFIO::Elf_Xword MAX_DATA_ENTRIES = 64;
-
-//------------------------------------------------------------------------------
-class dump {
-#define DUMP_DEC_FORMAT(width) std::setw(width) << std::setfill(' ') << std::dec << std::right
-#define DUMP_HEX_FORMAT(width) std::setw(width) << std::setfill('0') << std::hex << std::right
-#define DUMP_STR_FORMAT(width) std::setw(width) << std::setfill(' ') << std::hex << std::left
-
- public:
- //------------------------------------------------------------------------------
- static void header(std::ostream& out, const elfio& reader) {
- out << "ELF Header" << std::endl
- << std::endl
- << " Class: " << str_class(reader.get_class()) << std::endl
- << " Encoding: " << str_endian(reader.get_encoding()) << std::endl
- << " ELFVersion: " << str_version(reader.get_elf_version()) << std::endl
- << " Type: " << str_type(reader.get_type()) << std::endl
- << " Machine: " << str_machine(reader.get_machine()) << std::endl
- << " Version: " << str_version(reader.get_version()) << std::endl
- << " Entry: "
- << "0x" << std::hex << reader.get_entry() << std::endl
- << " Flags: "
- << "0x" << std::hex << reader.get_flags() << std::endl
- << std::endl;
- }
-
- //------------------------------------------------------------------------------
- static void section_headers(std::ostream& out, const elfio& reader) {
- Elf_Half n = reader.sections.size();
-
- if (n == 0) {
- return;
- }
-
- out << "Section Headers:" << std::endl;
- if (reader.get_class() == ELFCLASS32) { // Output for 32-bit
- out << "[ Nr ] Type Addr Size ES Flg Lk Inf Al Name" << std::endl;
- } else { // Output for 64-bit
- out << "[ Nr ] Type Addr Size ES Flg"
- << std::endl
- << " Lk Inf Al Name" << std::endl;
- }
-
- for (Elf_Half i = 0; i < n; ++i) { // For all sections
- section* sec = reader.sections[i];
- section_header(out, i, sec, reader.get_class());
- }
-
- out << "Key to Flags: W (write), A (alloc), X (execute)\n\n" << std::endl;
- }
-
- //------------------------------------------------------------------------------
- static void section_header(std::ostream& out, Elf_Half no, const section* sec,
- unsigned char elf_class) {
- std::ios_base::fmtflags original_flags = out.flags();
-
- if (elf_class == ELFCLASS32) { // Output for 32-bit
- out << "[" << DUMP_DEC_FORMAT(5) << no << "] " << DUMP_STR_FORMAT(17)
- << str_section_type(sec->get_type()) << " " << DUMP_HEX_FORMAT(8)
- << sec->get_address() << " " << DUMP_HEX_FORMAT(8) << sec->get_size() << " "
- << DUMP_HEX_FORMAT(2) << sec->get_entry_size() << " " << DUMP_STR_FORMAT(3)
- << section_flags(sec->get_flags()) << " " << DUMP_HEX_FORMAT(2) << sec->get_link()
- << " " << DUMP_HEX_FORMAT(3) << sec->get_info() << " " << DUMP_HEX_FORMAT(2)
- << sec->get_addr_align() << " " << DUMP_STR_FORMAT(17) << sec->get_name() << " "
- << std::endl;
- } else { // Output for 64-bit
- out << "[" << DUMP_DEC_FORMAT(5) << no << "] " << DUMP_STR_FORMAT(17)
- << str_section_type(sec->get_type()) << " " << DUMP_HEX_FORMAT(16)
- << sec->get_address() << " " << DUMP_HEX_FORMAT(16) << sec->get_size() << " "
- << DUMP_HEX_FORMAT(4) << sec->get_entry_size() << " " << DUMP_STR_FORMAT(3)
- << section_flags(sec->get_flags()) << " " << std::endl
- << " " << DUMP_HEX_FORMAT(4) << sec->get_link() << " " << DUMP_HEX_FORMAT(4)
- << sec->get_info() << " " << DUMP_HEX_FORMAT(4) << sec->get_addr_align() << " "
- << DUMP_STR_FORMAT(17) << sec->get_name() << " " << std::endl;
- }
-
- out.flags(original_flags);
-
- return;
- }
-
- //------------------------------------------------------------------------------
- static void segment_headers(std::ostream& out, const elfio& reader) {
- Elf_Half n = reader.segments.size();
- if (n == 0) {
- return;
- }
-
- out << "Segment headers:" << std::endl;
- if (reader.get_class() == ELFCLASS32) { // Output for 32-bit
- out << "[ Nr ] Type VirtAddr PhysAddr FileSize Mem.Size Flags Align"
- << std::endl;
- } else { // Output for 64-bit
- out << "[ Nr ] Type VirtAddr PhysAddr Flags" << std::endl
- << " FileSize Mem.Size Align" << std::endl;
- }
-
- for (Elf_Half i = 0; i < n; ++i) {
- segment* seg = reader.segments[i];
- segment_header(out, i, seg, reader.get_class());
- }
-
- out << std::endl;
- }
-
- //------------------------------------------------------------------------------
- static void segment_header(std::ostream& out, Elf_Half no, const segment* seg,
- unsigned int elf_class) {
- std::ios_base::fmtflags original_flags = out.flags();
-
- if (elf_class == ELFCLASS32) { // Output for 32-bit
- out << "[" << DUMP_DEC_FORMAT(5) << no << "] " << DUMP_STR_FORMAT(14)
- << str_segment_type(seg->get_type()) << " " << DUMP_HEX_FORMAT(8)
- << seg->get_virtual_address() << " " << DUMP_HEX_FORMAT(8)
- << seg->get_physical_address() << " " << DUMP_HEX_FORMAT(8) << seg->get_file_size()
- << " " << DUMP_HEX_FORMAT(8) << seg->get_memory_size() << " " << DUMP_STR_FORMAT(8)
- << str_segment_flag(seg->get_flags()) << " " << DUMP_HEX_FORMAT(8)
- << seg->get_align() << " " << std::endl;
- } else { // Output for 64-bit
- out << "[" << DUMP_DEC_FORMAT(5) << no << "] " << DUMP_STR_FORMAT(14)
- << str_segment_type(seg->get_type()) << " " << DUMP_HEX_FORMAT(16)
- << seg->get_virtual_address() << " " << DUMP_HEX_FORMAT(16)
- << seg->get_physical_address() << " " << DUMP_STR_FORMAT(16)
- << str_segment_flag(seg->get_flags()) << " " << std::endl
- << " " << DUMP_HEX_FORMAT(16) << seg->get_file_size() << " "
- << DUMP_HEX_FORMAT(16) << seg->get_memory_size() << " " << DUMP_HEX_FORMAT(16)
- << seg->get_align() << " " << std::endl;
- }
-
- out.flags(original_flags);
- }
-
- //------------------------------------------------------------------------------
- static void symbol_tables(std::ostream& out, const elfio& reader) {
- Elf_Half n = reader.sections.size();
- for (Elf_Half i = 0; i < n; ++i) { // For all sections
- section* sec = reader.sections[i];
- if (SHT_SYMTAB == sec->get_type() || SHT_DYNSYM == sec->get_type()) {
- symbol_section_accessor symbols(reader, sec);
-
- Elf_Xword sym_no = symbols.get_symbols_num();
- if (sym_no > 0) {
- out << "Symbol table (" << sec->get_name() << ")" << std::endl;
- if (reader.get_class() == ELFCLASS32) { // Output for 32-bit
- out << "[ Nr ] Value Size Type Bind Sect Name" << std::endl;
- } else { // Output for 64-bit
- out << "[ Nr ] Value Size Type Bind Sect"
- << std::endl
- << " Name" << std::endl;
- }
- for (Elf_Half i = 0; i < sym_no; ++i) {
- std::string name;
- Elf64_Addr value = 0;
- Elf_Xword size = 0;
- unsigned char bind = 0;
- unsigned char type = 0;
- Elf_Half section = 0;
- unsigned char other = 0;
- symbols.get_symbol(i, name, value, size, bind, type, section, other);
- symbol_table(out, i, name, value, size, bind, type, section,
- reader.get_class());
- }
-
- out << std::endl;
- }
- }
- }
- }
-
- //------------------------------------------------------------------------------
- static void symbol_table(std::ostream& out, Elf_Half no, std::string& name, Elf64_Addr value,
- Elf_Xword size, unsigned char bind, unsigned char type,
- Elf_Half section, unsigned int elf_class) {
- std::ios_base::fmtflags original_flags = out.flags();
-
- if (elf_class == ELFCLASS32) { // Output for 32-bit
- out << "[" << DUMP_DEC_FORMAT(5) << no << "] " << DUMP_HEX_FORMAT(8) << value << " "
- << DUMP_HEX_FORMAT(8) << size << " " << DUMP_STR_FORMAT(7) << str_symbol_type(type)
- << " " << DUMP_STR_FORMAT(8) << str_symbol_bind(bind) << " " << DUMP_DEC_FORMAT(5)
- << section << " " << DUMP_STR_FORMAT(1) << name << " " << std::endl;
- } else { // Output for 64-bit
- out << "[" << DUMP_DEC_FORMAT(5) << no << "] " << DUMP_HEX_FORMAT(16) << value << " "
- << DUMP_HEX_FORMAT(16) << size << " " << DUMP_STR_FORMAT(7) << str_symbol_type(type)
- << " " << DUMP_STR_FORMAT(8) << str_symbol_bind(bind) << " " << DUMP_DEC_FORMAT(5)
- << section << " " << std::endl
- << " " << DUMP_STR_FORMAT(1) << name << " " << std::endl;
- }
-
- out.flags(original_flags);
- }
-
- //------------------------------------------------------------------------------
- static void notes(std::ostream& out, const elfio& reader) {
- Elf_Half no = reader.sections.size();
- for (Elf_Half i = 0; i < no; ++i) { // For all sections
- section* sec = reader.sections[i];
- if (SHT_NOTE == sec->get_type()) { // Look at notes
- note_section_accessor notes(reader, sec);
- int no_notes = notes.get_notes_num();
- if (no > 0) {
- out << "Note section (" << sec->get_name() << ")" << std::endl
- << " No Type Name" << std::endl;
- for (int j = 0; j < no_notes; ++j) { // For all notes
- Elf_Word type;
- std::string name;
- void* desc;
- Elf_Word descsz;
-
- if (notes.get_note(j, type, name, desc, descsz)) {
- // 'name' usually contains \0 at the end. Try to fix it
- name = name.c_str();
- note(out, j, type, name);
- }
- }
-
- out << std::endl;
- }
- }
- }
- }
-
- //------------------------------------------------------------------------------
- static void note(std::ostream& out, int no, Elf_Word type, const std::string& name) {
- out << " [" << DUMP_DEC_FORMAT(2) << no << "] " << DUMP_HEX_FORMAT(8) << type << " "
- << DUMP_STR_FORMAT(1) << name << std::endl;
- }
-
- //------------------------------------------------------------------------------
- static void dynamic_tags(std::ostream& out, const elfio& reader) {
- Elf_Half n = reader.sections.size();
- for (Elf_Half i = 0; i < n; ++i) { // For all sections
- section* sec = reader.sections[i];
- if (SHT_DYNAMIC == sec->get_type()) {
- dynamic_section_accessor dynamic(reader, sec);
-
- Elf_Xword dyn_no = dynamic.get_entries_num();
- if (dyn_no > 0) {
- out << "Dynamic section (" << sec->get_name() << ")" << std::endl;
- out << "[ Nr ] Tag Name/Value" << std::endl;
- for (int i = 0; i < dyn_no; ++i) {
- Elf_Xword tag = 0;
- Elf_Xword value = 0;
- std::string str;
- dynamic.get_entry(i, tag, value, str);
- dynamic_tag(out, i, tag, value, str, reader.get_class());
- if (DT_NULL == tag) {
- break;
- }
- }
-
- out << std::endl;
- }
- }
- }
- }
-
- //------------------------------------------------------------------------------
- static void dynamic_tag(std::ostream& out, int no, Elf_Xword tag, Elf_Xword value,
- std::string str, unsigned int /*elf_class*/) {
- out << "[" << DUMP_DEC_FORMAT(5) << no << "] " << DUMP_STR_FORMAT(16)
- << str_dynamic_tag(tag) << " ";
- if (str.empty()) {
- out << DUMP_HEX_FORMAT(16) << value << " ";
- } else {
- out << DUMP_STR_FORMAT(32) << str << " ";
- }
- out << std::endl;
- }
-
- //------------------------------------------------------------------------------
- static void section_data(std::ostream& out, const section* sec) {
- std::ios_base::fmtflags original_flags = out.flags();
-
- out << sec->get_name() << std::endl;
- const char* pdata = sec->get_data();
- if (pdata) {
- ELFIO::Elf_Xword i;
- for (i = 0; i < std::min(sec->get_size(), MAX_DATA_ENTRIES); ++i) {
- if (i % 16 == 0) {
- out << "[" << DUMP_HEX_FORMAT(8) << i << "]";
- }
-
- out << " " << DUMP_HEX_FORMAT(2) << (pdata[i] & 0x000000FF);
-
- if (i % 16 == 15) {
- out << std::endl;
- }
- }
- if (i % 16 != 0) {
- out << std::endl;
- }
-
- out.flags(original_flags);
- }
-
- return;
- }
-
- //------------------------------------------------------------------------------
- static void section_datas(std::ostream& out, const elfio& reader) {
- Elf_Half n = reader.sections.size();
-
- if (n == 0) {
- return;
- }
-
- out << "Section Data:" << std::endl;
-
- for (Elf_Half i = 1; i < n; ++i) { // For all sections
- section* sec = reader.sections[i];
- if (sec->get_type() == SHT_NOBITS) {
- continue;
- }
- section_data(out, sec);
- }
-
- out << std::endl;
- }
-
- //------------------------------------------------------------------------------
- static void segment_data(std::ostream& out, Elf_Half no, const segment* seg) {
- std::ios_base::fmtflags original_flags = out.flags();
-
- out << "Segment # " << no << std::endl;
- const char* pdata = seg->get_data();
- if (pdata) {
- ELFIO::Elf_Xword i;
- for (i = 0; i < std::min(seg->get_file_size(), MAX_DATA_ENTRIES); ++i) {
- if (i % 16 == 0) {
- out << "[" << DUMP_HEX_FORMAT(8) << i << "]";
- }
-
- out << " " << DUMP_HEX_FORMAT(2) << (pdata[i] & 0x000000FF);
-
- if (i % 16 == 15) {
- out << std::endl;
- }
- }
- if (i % 16 != 0) {
- out << std::endl;
- }
-
- out.flags(original_flags);
- }
-
- return;
- }
-
- //------------------------------------------------------------------------------
- static void segment_datas(std::ostream& out, const elfio& reader) {
- Elf_Half n = reader.segments.size();
-
- if (n == 0) {
- return;
- }
-
- out << "Segment Data:" << std::endl;
-
- for (Elf_Half i = 0; i < n; ++i) { // For all sections
- segment* seg = reader.segments[i];
- segment_data(out, i, seg);
- }
-
- out << std::endl;
- }
-
- private:
- //------------------------------------------------------------------------------
- template <typename T, typename K>
- std::string static find_value_in_table(const T& table, const K& key) {
- std::string res = "?";
- for (unsigned int i = 0; i < sizeof(table) / sizeof(table[0]); ++i) {
- if (table[i].key == key) {
- res = table[i].str;
- break;
- }
- }
-
- return res;
- }
-
-
- //------------------------------------------------------------------------------
- template <typename T, typename K>
- static std::string format_assoc(const T& table, const K& key) {
- std::string str = find_value_in_table(table, key);
- if (str == "?") {
- std::ostringstream oss;
- oss << str << " (0x" << std::hex << key << ")";
- str = oss.str();
- }
-
- return str;
- }
-
-
- //------------------------------------------------------------------------------
- template <typename T>
- static std::string format_assoc(const T& table, const char key) {
- return format_assoc(table, (const int)key);
- }
-
-
- //------------------------------------------------------------------------------
- static std::string section_flags(Elf_Xword flags) {
- std::string ret = "";
- if (flags & SHF_WRITE) {
- ret += "W";
- }
- if (flags & SHF_ALLOC) {
- ret += "A";
- }
- if (flags & SHF_EXECINSTR) {
- ret += "X";
- }
-
- return ret;
- }
-
-
-//------------------------------------------------------------------------------
-#define STR_FUNC_TABLE(name) \
- template <typename T> \
- static std::string str_##name(const T key) { \
- return format_assoc(name##_table, key); \
- }
-
- STR_FUNC_TABLE(class)
- STR_FUNC_TABLE(endian)
- STR_FUNC_TABLE(version)
- STR_FUNC_TABLE(type)
- STR_FUNC_TABLE(machine)
- STR_FUNC_TABLE(section_type)
- STR_FUNC_TABLE(segment_type)
- STR_FUNC_TABLE(segment_flag)
- STR_FUNC_TABLE(symbol_bind)
- STR_FUNC_TABLE(symbol_type)
- STR_FUNC_TABLE(dynamic_tag)
-
-#undef STR_FUNC_TABLE
-#undef DUMP_DEC_FORMAT
-#undef DUMP_HEX_FORMAT
-#undef DUMP_STR_FORMAT
-}; // class dump
-
-
-}; // namespace ELFIO
-
-#endif // ELFIO_DUMP_HPP
diff --git a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_dynamic.hpp b/third_party/rocm/include/hip/hcc_detail/elfio/elfio_dynamic.hpp
deleted file mode 100644
index 53a6e28..0000000
--- a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_dynamic.hpp
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
-Copyright (C) 2001-2015 by Serge Lamikhov-Center
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef ELFIO_DYNAMIC_HPP
-#define ELFIO_DYNAMIC_HPP
-
-namespace ELFIO {
-
-//------------------------------------------------------------------------------
-class dynamic_section_accessor {
- public:
- //------------------------------------------------------------------------------
- dynamic_section_accessor(const elfio& elf_file_, section* section_)
- : elf_file(elf_file_), dynamic_section(section_) {}
-
- //------------------------------------------------------------------------------
- Elf_Xword get_entries_num() const {
- Elf_Xword nRet = 0;
-
- if (0 != dynamic_section->get_entry_size()) {
- nRet = dynamic_section->get_size() / dynamic_section->get_entry_size();
- }
-
- return nRet;
- }
-
- //------------------------------------------------------------------------------
- bool get_entry(Elf_Xword index, Elf_Xword& tag, Elf_Xword& value, std::string& str) const {
- if (index >= get_entries_num()) { // Is index valid
- return false;
- }
-
- if (elf_file.get_class() == ELFCLASS32) {
- generic_get_entry_dyn<Elf32_Dyn>(index, tag, value);
- } else {
- generic_get_entry_dyn<Elf64_Dyn>(index, tag, value);
- }
-
- // If the tag may have a string table reference, prepare the string
- if (tag == DT_NEEDED || tag == DT_SONAME || tag == DT_RPATH || tag == DT_RUNPATH) {
- string_section_accessor strsec = elf_file.sections[get_string_table_index()];
- const char* result = strsec.get_string(value);
- if (0 == result) {
- str.clear();
- return false;
- }
- str = result;
- } else {
- str.clear();
- }
-
- return true;
- }
-
- //------------------------------------------------------------------------------
- void add_entry(Elf_Xword& tag, Elf_Xword& value) {
- if (elf_file.get_class() == ELFCLASS32) {
- generic_add_entry<Elf32_Dyn>(tag, value);
- } else {
- generic_add_entry<Elf64_Dyn>(tag, value);
- }
- }
-
- //------------------------------------------------------------------------------
- void add_entry(Elf_Xword& tag, std::string& str) {
- string_section_accessor strsec = elf_file.sections[get_string_table_index()];
- Elf_Xword value = strsec.add_string(str);
- add_entry(tag, value);
- }
-
- //------------------------------------------------------------------------------
- private:
- //------------------------------------------------------------------------------
- Elf_Half get_string_table_index() const { return (Elf_Half)dynamic_section->get_link(); }
-
- //------------------------------------------------------------------------------
- template <class T>
- void generic_get_entry_dyn(Elf_Xword index, Elf_Xword& tag, Elf_Xword& value) const {
- const endianess_convertor& convertor = elf_file.get_convertor();
-
- // Check unusual case when dynamic section has no data
- if (dynamic_section->get_data() == 0 ||
- (index + 1) * dynamic_section->get_entry_size() > dynamic_section->get_size()) {
- tag = DT_NULL;
- value = 0;
- return;
- }
-
- const T* pEntry = reinterpret_cast<const T*>(dynamic_section->get_data() +
- index * dynamic_section->get_entry_size());
- tag = convertor(pEntry->d_tag);
- switch (tag) {
- case DT_NULL:
- case DT_SYMBOLIC:
- case DT_TEXTREL:
- case DT_BIND_NOW:
- value = 0;
- break;
- case DT_NEEDED:
- case DT_PLTRELSZ:
- case DT_RELASZ:
- case DT_RELAENT:
- case DT_STRSZ:
- case DT_SYMENT:
- case DT_SONAME:
- case DT_RPATH:
- case DT_RELSZ:
- case DT_RELENT:
- case DT_PLTREL:
- case DT_INIT_ARRAYSZ:
- case DT_FINI_ARRAYSZ:
- case DT_RUNPATH:
- case DT_FLAGS:
- case DT_PREINIT_ARRAYSZ:
- value = convertor(pEntry->d_un.d_val);
- break;
- case DT_PLTGOT:
- case DT_HASH:
- case DT_STRTAB:
- case DT_SYMTAB:
- case DT_RELA:
- case DT_INIT:
- case DT_FINI:
- case DT_REL:
- case DT_DEBUG:
- case DT_JMPREL:
- case DT_INIT_ARRAY:
- case DT_FINI_ARRAY:
- case DT_PREINIT_ARRAY:
- default:
- value = convertor(pEntry->d_un.d_ptr);
- break;
- }
- }
-
- //------------------------------------------------------------------------------
- template <class T>
- void generic_add_entry(Elf_Xword tag, Elf_Xword value) {
- const endianess_convertor& convertor = elf_file.get_convertor();
-
- T entry;
-
- switch (tag) {
- case DT_NULL:
- case DT_SYMBOLIC:
- case DT_TEXTREL:
- case DT_BIND_NOW:
- value = 0;
- case DT_NEEDED:
- case DT_PLTRELSZ:
- case DT_RELASZ:
- case DT_RELAENT:
- case DT_STRSZ:
- case DT_SYMENT:
- case DT_SONAME:
- case DT_RPATH:
- case DT_RELSZ:
- case DT_RELENT:
- case DT_PLTREL:
- case DT_INIT_ARRAYSZ:
- case DT_FINI_ARRAYSZ:
- case DT_RUNPATH:
- case DT_FLAGS:
- case DT_PREINIT_ARRAYSZ:
- entry.d_un.d_val = convertor(value);
- break;
- case DT_PLTGOT:
- case DT_HASH:
- case DT_STRTAB:
- case DT_SYMTAB:
- case DT_RELA:
- case DT_INIT:
- case DT_FINI:
- case DT_REL:
- case DT_DEBUG:
- case DT_JMPREL:
- case DT_INIT_ARRAY:
- case DT_FINI_ARRAY:
- case DT_PREINIT_ARRAY:
- default:
- entry.d_un.d_ptr = convertor(value);
- break;
- }
-
- entry.d_tag = convertor(tag);
-
- dynamic_section->append_data(reinterpret_cast<char*>(&entry), sizeof(entry));
- }
-
- //------------------------------------------------------------------------------
- private:
- const elfio& elf_file;
- section* dynamic_section;
-};
-
-} // namespace ELFIO
-
-#endif // ELFIO_DYNAMIC_HPP
diff --git a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_header.hpp b/third_party/rocm/include/hip/hcc_detail/elfio/elfio_header.hpp
deleted file mode 100644
index b95f0a9..0000000
--- a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_header.hpp
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
-Copyright (C) 2001-2015 by Serge Lamikhov-Center
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef ELF_HEADER_HPP
-#define ELF_HEADER_HPP
-
-#include <iostream>
-
-namespace ELFIO {
-
-class elf_header {
- public:
- virtual ~elf_header(){};
- virtual bool load(std::istream& stream) = 0;
- virtual bool save(std::ostream& stream) const = 0;
-
- // ELF header functions
- ELFIO_GET_ACCESS_DECL(unsigned char, class);
- ELFIO_GET_ACCESS_DECL(unsigned char, elf_version);
- ELFIO_GET_ACCESS_DECL(unsigned char, encoding);
- ELFIO_GET_ACCESS_DECL(Elf_Word, version);
- ELFIO_GET_ACCESS_DECL(Elf_Half, header_size);
- ELFIO_GET_ACCESS_DECL(Elf_Half, section_entry_size);
- ELFIO_GET_ACCESS_DECL(Elf_Half, segment_entry_size);
-
- ELFIO_GET_SET_ACCESS_DECL(unsigned char, os_abi);
- ELFIO_GET_SET_ACCESS_DECL(unsigned char, abi_version);
- ELFIO_GET_SET_ACCESS_DECL(Elf_Half, type);
- ELFIO_GET_SET_ACCESS_DECL(Elf_Half, machine);
- ELFIO_GET_SET_ACCESS_DECL(Elf_Word, flags);
- ELFIO_GET_SET_ACCESS_DECL(Elf64_Addr, entry);
- ELFIO_GET_SET_ACCESS_DECL(Elf_Half, sections_num);
- ELFIO_GET_SET_ACCESS_DECL(Elf64_Off, sections_offset);
- ELFIO_GET_SET_ACCESS_DECL(Elf_Half, segments_num);
- ELFIO_GET_SET_ACCESS_DECL(Elf64_Off, segments_offset);
- ELFIO_GET_SET_ACCESS_DECL(Elf_Half, section_name_str_index);
-};
-
-
-template <class T>
-struct elf_header_impl_types;
-template <>
-struct elf_header_impl_types<Elf32_Ehdr> {
- typedef Elf32_Phdr Phdr_type;
- typedef Elf32_Shdr Shdr_type;
- static const unsigned char file_class = ELFCLASS32;
-};
-template <>
-struct elf_header_impl_types<Elf64_Ehdr> {
- typedef Elf64_Phdr Phdr_type;
- typedef Elf64_Shdr Shdr_type;
- static const unsigned char file_class = ELFCLASS64;
-};
-
-template <class T>
-class elf_header_impl : public elf_header {
- public:
- elf_header_impl(endianess_convertor* convertor_, unsigned char encoding) {
- convertor = convertor_;
-
- std::fill_n(reinterpret_cast<char*>(&header), sizeof(header), '\0');
-
- header.e_ident[EI_MAG0] = ELFMAG0;
- header.e_ident[EI_MAG1] = ELFMAG1;
- header.e_ident[EI_MAG2] = ELFMAG2;
- header.e_ident[EI_MAG3] = ELFMAG3;
- header.e_ident[EI_CLASS] = elf_header_impl_types<T>::file_class;
- header.e_ident[EI_DATA] = encoding;
- header.e_ident[EI_VERSION] = EV_CURRENT;
- header.e_version = EV_CURRENT;
- header.e_version = (*convertor)(header.e_version);
- header.e_ehsize = (sizeof(header));
- header.e_ehsize = (*convertor)(header.e_ehsize);
- header.e_shstrndx = (*convertor)((Elf_Half)1);
- header.e_phentsize = sizeof(typename elf_header_impl_types<T>::Phdr_type);
- header.e_shentsize = sizeof(typename elf_header_impl_types<T>::Shdr_type);
- header.e_phentsize = (*convertor)(header.e_phentsize);
- header.e_shentsize = (*convertor)(header.e_shentsize);
- }
-
- bool load(std::istream& stream) {
- stream.seekg(0);
- stream.read(reinterpret_cast<char*>(&header), sizeof(header));
-
- return (stream.gcount() == sizeof(header));
- }
-
- bool save(std::ostream& stream) const {
- stream.seekp(0);
- stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
-
- return stream.good();
- }
-
- // ELF header functions
- ELFIO_GET_ACCESS(unsigned char, class, header.e_ident[EI_CLASS]);
- ELFIO_GET_ACCESS(unsigned char, elf_version, header.e_ident[EI_VERSION]);
- ELFIO_GET_ACCESS(unsigned char, encoding, header.e_ident[EI_DATA]);
- ELFIO_GET_ACCESS(Elf_Word, version, header.e_version);
- ELFIO_GET_ACCESS(Elf_Half, header_size, header.e_ehsize);
- ELFIO_GET_ACCESS(Elf_Half, section_entry_size, header.e_shentsize);
- ELFIO_GET_ACCESS(Elf_Half, segment_entry_size, header.e_phentsize);
-
- ELFIO_GET_SET_ACCESS(unsigned char, os_abi, header.e_ident[EI_OSABI]);
- ELFIO_GET_SET_ACCESS(unsigned char, abi_version, header.e_ident[EI_ABIVERSION]);
- ELFIO_GET_SET_ACCESS(Elf_Half, type, header.e_type);
- ELFIO_GET_SET_ACCESS(Elf_Half, machine, header.e_machine);
- ELFIO_GET_SET_ACCESS(Elf_Word, flags, header.e_flags);
- ELFIO_GET_SET_ACCESS(Elf_Half, section_name_str_index, header.e_shstrndx);
- ELFIO_GET_SET_ACCESS(Elf64_Addr, entry, header.e_entry);
- ELFIO_GET_SET_ACCESS(Elf_Half, sections_num, header.e_shnum);
- ELFIO_GET_SET_ACCESS(Elf64_Off, sections_offset, header.e_shoff);
- ELFIO_GET_SET_ACCESS(Elf_Half, segments_num, header.e_phnum);
- ELFIO_GET_SET_ACCESS(Elf64_Off, segments_offset, header.e_phoff);
-
- private:
- T header;
- endianess_convertor* convertor;
-};
-
-} // namespace ELFIO
-
-#endif // ELF_HEADER_HPP
diff --git a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_note.hpp b/third_party/rocm/include/hip/hcc_detail/elfio/elfio_note.hpp
deleted file mode 100644
index e350c85..0000000
--- a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_note.hpp
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
-Copyright (C) 2001-2015 by Serge Lamikhov-Center
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef ELFIO_NOTE_HPP
-#define ELFIO_NOTE_HPP
-
-namespace ELFIO {
-
-//------------------------------------------------------------------------------
-// There are discrepancies in documentations. SCO documentation
-// (http://www.sco.com/developers/gabi/latest/ch5.pheader.html#note_section)
-// requires 8 byte entries alignment for 64-bit ELF file,
-// but Oracle's definition uses the same structure
-// for 32-bit and 64-bit formats.
-// (https://docs.oracle.com/cd/E23824_01/html/819-0690/chapter6-18048.html)
-//
-// It looks like EM_X86_64 Linux implementation is similar to Oracle's
-// definition. Therefore, the same alignment works for both formats
-//------------------------------------------------------------------------------
-
-//------------------------------------------------------------------------------
-class note_section_accessor {
- public:
- //------------------------------------------------------------------------------
- note_section_accessor(const elfio& elf_file_, section* section_)
- : elf_file(elf_file_), note_section(section_) {
- process_section();
- }
-
- //------------------------------------------------------------------------------
- Elf_Word get_notes_num() const { return (Elf_Word)note_start_positions.size(); }
-
- //------------------------------------------------------------------------------
- bool get_note(Elf_Word index, Elf_Word& type, std::string& name, void*& desc,
- Elf_Word& descSize) const {
- if (index >= note_section->get_size()) {
- return false;
- }
-
- const char* pData = note_section->get_data() + note_start_positions[index];
- int align = sizeof(Elf_Word);
-
- const endianess_convertor& convertor = elf_file.get_convertor();
- type = convertor(*(Elf_Word*)(pData + 2 * align));
- Elf_Word namesz = convertor(*(Elf_Word*)(pData));
- descSize = convertor(*(Elf_Word*)(pData + sizeof(namesz)));
- Elf_Word max_name_size = note_section->get_size() - note_start_positions[index];
- if (namesz > max_name_size || namesz + descSize > max_name_size) {
- return false;
- }
- name.assign(pData + 3 * align, namesz - 1);
- if (0 == descSize) {
- desc = 0;
- } else {
- desc = const_cast<char*>(pData + 3 * align + ((namesz + align - 1) / align) * align);
- }
-
- return true;
- }
-
- //------------------------------------------------------------------------------
- void add_note(Elf_Word type, const std::string& name, const void* desc, Elf_Word descSize) {
- const endianess_convertor& convertor = elf_file.get_convertor();
-
- int align = sizeof(Elf_Word);
- Elf_Word nameLen = (Elf_Word)name.size() + 1;
- Elf_Word nameLenConv = convertor(nameLen);
- std::string buffer(reinterpret_cast<char*>(&nameLenConv), align);
- Elf_Word descSizeConv = convertor(descSize);
- buffer.append(reinterpret_cast<char*>(&descSizeConv), align);
- type = convertor(type);
- buffer.append(reinterpret_cast<char*>(&type), align);
- buffer.append(name);
- buffer.append(1, '\x00');
- const char pad[] = {'\0', '\0', '\0', '\0'};
- if (nameLen % align != 0) {
- buffer.append(pad, align - nameLen % align);
- }
- if (desc != 0 && descSize != 0) {
- buffer.append(reinterpret_cast<const char*>(desc), descSize);
- if (descSize % align != 0) {
- buffer.append(pad, align - descSize % align);
- }
- }
-
- note_start_positions.push_back(note_section->get_size());
- note_section->append_data(buffer);
- }
-
- private:
- //------------------------------------------------------------------------------
- void process_section() {
- const endianess_convertor& convertor = elf_file.get_convertor();
- const char* data = note_section->get_data();
- Elf_Xword size = note_section->get_size();
- Elf_Xword current = 0;
-
- note_start_positions.clear();
-
- // Is it empty?
- if (0 == data || 0 == size) {
- return;
- }
-
- int align = sizeof(Elf_Word);
- while (current + 3 * align <= size) {
- note_start_positions.push_back(current);
- Elf_Word namesz = convertor(*(Elf_Word*)(data + current));
- Elf_Word descsz = convertor(*(Elf_Word*)(data + current + sizeof(namesz)));
-
- current += 3 * sizeof(Elf_Word) + ((namesz + align - 1) / align) * align +
- ((descsz + align - 1) / align) * align;
- }
- }
-
- //------------------------------------------------------------------------------
- private:
- const elfio& elf_file;
- section* note_section;
- std::vector<Elf_Xword> note_start_positions;
-};
-
-} // namespace ELFIO
-
-#endif // ELFIO_NOTE_HPP
diff --git a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_relocation.hpp b/third_party/rocm/include/hip/hcc_detail/elfio/elfio_relocation.hpp
deleted file mode 100644
index 270c911..0000000
--- a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_relocation.hpp
+++ /dev/null
@@ -1,280 +0,0 @@
-/*
-Copyright (C) 2001-2015 by Serge Lamikhov-Center
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef ELFIO_RELOCATION_HPP
-#define ELFIO_RELOCATION_HPP
-
-namespace ELFIO {
-
-template <typename T>
-struct get_sym_and_type;
-template <>
-struct get_sym_and_type<Elf32_Rel> {
- static int get_r_sym(Elf_Xword info) { return ELF32_R_SYM((Elf_Word)info); }
- static int get_r_type(Elf_Xword info) { return ELF32_R_TYPE((Elf_Word)info); }
-};
-template <>
-struct get_sym_and_type<Elf32_Rela> {
- static int get_r_sym(Elf_Xword info) { return ELF32_R_SYM((Elf_Word)info); }
- static int get_r_type(Elf_Xword info) { return ELF32_R_TYPE((Elf_Word)info); }
-};
-template <>
-struct get_sym_and_type<Elf64_Rel> {
- static int get_r_sym(Elf_Xword info) { return ELF64_R_SYM(info); }
- static int get_r_type(Elf_Xword info) { return ELF64_R_TYPE(info); }
-};
-template <>
-struct get_sym_and_type<Elf64_Rela> {
- static int get_r_sym(Elf_Xword info) { return ELF64_R_SYM(info); }
- static int get_r_type(Elf_Xword info) { return ELF64_R_TYPE(info); }
-};
-
-
-//------------------------------------------------------------------------------
-class relocation_section_accessor {
- public:
- //------------------------------------------------------------------------------
- relocation_section_accessor(const elfio& elf_file_, section* section_)
- : elf_file(elf_file_), relocation_section(section_) {}
-
- //------------------------------------------------------------------------------
- Elf_Xword get_entries_num() const {
- Elf_Xword nRet = 0;
-
- if (0 != relocation_section->get_entry_size()) {
- nRet = relocation_section->get_size() / relocation_section->get_entry_size();
- }
-
- return nRet;
- }
-
- //------------------------------------------------------------------------------
- bool get_entry(Elf_Xword index, Elf64_Addr& offset, Elf_Word& symbol, Elf_Word& type,
- Elf_Sxword& addend) const {
- if (index >= get_entries_num()) { // Is index valid
- return false;
- }
-
- if (elf_file.get_class() == ELFCLASS32) {
- if (SHT_REL == relocation_section->get_type()) {
- generic_get_entry_rel<Elf32_Rel>(index, offset, symbol, type, addend);
- } else if (SHT_RELA == relocation_section->get_type()) {
- generic_get_entry_rela<Elf32_Rela>(index, offset, symbol, type, addend);
- }
- } else {
- if (SHT_REL == relocation_section->get_type()) {
- generic_get_entry_rel<Elf64_Rel>(index, offset, symbol, type, addend);
- } else if (SHT_RELA == relocation_section->get_type()) {
- generic_get_entry_rela<Elf64_Rela>(index, offset, symbol, type, addend);
- }
- }
-
- return true;
- }
-
- //------------------------------------------------------------------------------
- bool get_entry(Elf_Xword index, Elf64_Addr& offset, Elf64_Addr& symbolValue,
- std::string& symbolName, Elf_Word& type, Elf_Sxword& addend,
- Elf_Sxword& calcValue) const {
- // Do regular job
- Elf_Word symbol;
- bool ret = get_entry(index, offset, symbol, type, addend);
-
- // Find the symbol
- Elf_Xword size;
- unsigned char bind;
- unsigned char symbolType;
- Elf_Half section;
- unsigned char other;
-
- symbol_section_accessor symbols(elf_file, elf_file.sections[get_symbol_table_index()]);
- ret = ret && symbols.get_symbol(symbol, symbolName, symbolValue, size, bind, symbolType,
- section, other);
-
- if (ret) { // Was it successful?
- switch (type) {
- case R_386_NONE: // none
- calcValue = 0;
- break;
- case R_386_32: // S + A
- calcValue = symbolValue + addend;
- break;
- case R_386_PC32: // S + A - P
- calcValue = symbolValue + addend - offset;
- break;
- case R_386_GOT32: // G + A - P
- calcValue = 0;
- break;
- case R_386_PLT32: // L + A - P
- calcValue = 0;
- break;
- case R_386_COPY: // none
- calcValue = 0;
- break;
- case R_386_GLOB_DAT: // S
- case R_386_JMP_SLOT: // S
- calcValue = symbolValue;
- break;
- case R_386_RELATIVE: // B + A
- calcValue = addend;
- break;
- case R_386_GOTOFF: // S + A - GOT
- calcValue = 0;
- break;
- case R_386_GOTPC: // GOT + A - P
- calcValue = 0;
- break;
- default: // Not recognized symbol!
- calcValue = 0;
- break;
- }
- }
-
- return ret;
- }
-
- //------------------------------------------------------------------------------
- void add_entry(Elf64_Addr offset, Elf_Xword info) {
- if (elf_file.get_class() == ELFCLASS32) {
- generic_add_entry<Elf32_Rel>(offset, info);
- } else {
- generic_add_entry<Elf64_Rel>(offset, info);
- }
- }
-
- //------------------------------------------------------------------------------
- void add_entry(Elf64_Addr offset, Elf_Word symbol, unsigned char type) {
- Elf_Xword info;
- if (elf_file.get_class() == ELFCLASS32) {
- info = ELF32_R_INFO((Elf_Xword)symbol, type);
- } else {
- info = ELF64_R_INFO((Elf_Xword)symbol, type);
- }
-
- add_entry(offset, info);
- }
-
- //------------------------------------------------------------------------------
- void add_entry(Elf64_Addr offset, Elf_Xword info, Elf_Sxword addend) {
- if (elf_file.get_class() == ELFCLASS32) {
- generic_add_entry<Elf32_Rela>(offset, info, addend);
- } else {
- generic_add_entry<Elf64_Rela>(offset, info, addend);
- }
- }
-
- //------------------------------------------------------------------------------
- void add_entry(Elf64_Addr offset, Elf_Word symbol, unsigned char type, Elf_Sxword addend) {
- Elf_Xword info;
- if (elf_file.get_class() == ELFCLASS32) {
- info = ELF32_R_INFO((Elf_Xword)symbol, type);
- } else {
- info = ELF64_R_INFO((Elf_Xword)symbol, type);
- }
-
- add_entry(offset, info, addend);
- }
-
- //------------------------------------------------------------------------------
- void add_entry(string_section_accessor str_writer, const char* str,
- symbol_section_accessor sym_writer, Elf64_Addr value, Elf_Word size,
- unsigned char sym_info, unsigned char other, Elf_Half shndx, Elf64_Addr offset,
- unsigned char type) {
- Elf_Word str_index = str_writer.add_string(str);
- Elf_Word sym_index = sym_writer.add_symbol(str_index, value, size, sym_info, other, shndx);
- add_entry(offset, sym_index, type);
- }
-
- //------------------------------------------------------------------------------
- private:
- //------------------------------------------------------------------------------
- Elf_Half get_symbol_table_index() const { return (Elf_Half)relocation_section->get_link(); }
-
- //------------------------------------------------------------------------------
- template <class T>
- void generic_get_entry_rel(Elf_Xword index, Elf64_Addr& offset, Elf_Word& symbol,
- Elf_Word& type, Elf_Sxword& addend) const {
- const endianess_convertor& convertor = elf_file.get_convertor();
-
- const T* pEntry = reinterpret_cast<const T*>(relocation_section->get_data() +
- index * relocation_section->get_entry_size());
- offset = convertor(pEntry->r_offset);
- Elf_Xword tmp = convertor(pEntry->r_info);
- symbol = get_sym_and_type<T>::get_r_sym(tmp);
- type = get_sym_and_type<T>::get_r_type(tmp);
- addend = 0;
- }
-
- //------------------------------------------------------------------------------
- template <class T>
- void generic_get_entry_rela(Elf_Xword index, Elf64_Addr& offset, Elf_Word& symbol,
- Elf_Word& type, Elf_Sxword& addend) const {
- const endianess_convertor& convertor = elf_file.get_convertor();
-
- const T* pEntry = reinterpret_cast<const T*>(relocation_section->get_data() +
- index * relocation_section->get_entry_size());
- offset = convertor(pEntry->r_offset);
- Elf_Xword tmp = convertor(pEntry->r_info);
- symbol = get_sym_and_type<T>::get_r_sym(tmp);
- type = get_sym_and_type<T>::get_r_type(tmp);
- addend = convertor(pEntry->r_addend);
- }
-
- //------------------------------------------------------------------------------
- template <class T>
- void generic_add_entry(Elf64_Addr offset, Elf_Xword info) {
- const endianess_convertor& convertor = elf_file.get_convertor();
-
- T entry;
- entry.r_offset = offset;
- entry.r_info = info;
- entry.r_offset = convertor(entry.r_offset);
- entry.r_info = convertor(entry.r_info);
-
- relocation_section->append_data(reinterpret_cast<char*>(&entry), sizeof(entry));
- }
-
- //------------------------------------------------------------------------------
- template <class T>
- void generic_add_entry(Elf64_Addr offset, Elf_Xword info, Elf_Sxword addend) {
- const endianess_convertor& convertor = elf_file.get_convertor();
-
- T entry;
- entry.r_offset = offset;
- entry.r_info = info;
- entry.r_addend = addend;
- entry.r_offset = convertor(entry.r_offset);
- entry.r_info = convertor(entry.r_info);
- entry.r_addend = convertor(entry.r_addend);
-
- relocation_section->append_data(reinterpret_cast<char*>(&entry), sizeof(entry));
- }
-
- //------------------------------------------------------------------------------
- private:
- const elfio& elf_file;
- section* relocation_section;
-};
-
-} // namespace ELFIO
-
-#endif // ELFIO_RELOCATION_HPP
diff --git a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_section.hpp b/third_party/rocm/include/hip/hcc_detail/elfio/elfio_section.hpp
deleted file mode 100644
index 6106fc7..0000000
--- a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_section.hpp
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
-Copyright (C) 2001-2015 by Serge Lamikhov-Center
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef ELFIO_SECTION_HPP
-#define ELFIO_SECTION_HPP
-
-#include <string>
-#include <iostream>
-
-namespace ELFIO {
-
-class section {
- friend class elfio;
-
- public:
- virtual ~section(){};
-
- ELFIO_GET_ACCESS_DECL(Elf_Half, index);
- ELFIO_GET_SET_ACCESS_DECL(std::string, name);
- ELFIO_GET_SET_ACCESS_DECL(Elf_Word, type);
- ELFIO_GET_SET_ACCESS_DECL(Elf_Xword, flags);
- ELFIO_GET_SET_ACCESS_DECL(Elf_Word, info);
- ELFIO_GET_SET_ACCESS_DECL(Elf_Word, link);
- ELFIO_GET_SET_ACCESS_DECL(Elf_Xword, addr_align);
- ELFIO_GET_SET_ACCESS_DECL(Elf_Xword, entry_size);
- ELFIO_GET_SET_ACCESS_DECL(Elf64_Addr, address);
- ELFIO_GET_SET_ACCESS_DECL(Elf_Xword, size);
- ELFIO_GET_SET_ACCESS_DECL(Elf_Word, name_string_offset);
-
- virtual const char* get_data() const = 0;
- virtual void set_data(const char* pData, Elf_Word size) = 0;
- virtual void set_data(const std::string& data) = 0;
- virtual void append_data(const char* pData, Elf_Word size) = 0;
- virtual void append_data(const std::string& data) = 0;
-
- protected:
- ELFIO_GET_SET_ACCESS_DECL(Elf64_Off, offset);
- ELFIO_SET_ACCESS_DECL(Elf_Half, index);
-
- virtual void load(std::istream& f, std::streampos header_offset) = 0;
- virtual void save(std::ostream& f, std::streampos header_offset,
- std::streampos data_offset) = 0;
- virtual bool is_address_initialized() const = 0;
-};
-
-
-template <class T>
-class section_impl : public section {
- public:
- //------------------------------------------------------------------------------
- section_impl(const endianess_convertor* convertor_) : convertor(convertor_) {
- std::fill_n(reinterpret_cast<char*>(&header), sizeof(header), '\0');
- is_address_set = false;
- data = 0;
- data_size = 0;
- }
-
- //------------------------------------------------------------------------------
- ~section_impl() { delete[] data; }
-
- //------------------------------------------------------------------------------
- // Section info functions
- ELFIO_GET_SET_ACCESS(Elf_Word, type, header.sh_type);
- ELFIO_GET_SET_ACCESS(Elf_Xword, flags, header.sh_flags);
- ELFIO_GET_SET_ACCESS(Elf_Xword, size, header.sh_size);
- ELFIO_GET_SET_ACCESS(Elf_Word, link, header.sh_link);
- ELFIO_GET_SET_ACCESS(Elf_Word, info, header.sh_info);
- ELFIO_GET_SET_ACCESS(Elf_Xword, addr_align, header.sh_addralign);
- ELFIO_GET_SET_ACCESS(Elf_Xword, entry_size, header.sh_entsize);
- ELFIO_GET_SET_ACCESS(Elf_Word, name_string_offset, header.sh_name);
- ELFIO_GET_ACCESS(Elf64_Addr, address, header.sh_addr);
-
- //------------------------------------------------------------------------------
- Elf_Half get_index() const { return index; }
-
-
- //------------------------------------------------------------------------------
- std::string get_name() const { return name; }
-
- //------------------------------------------------------------------------------
- void set_name(std::string name_) { name = name_; }
-
- //------------------------------------------------------------------------------
- void set_address(Elf64_Addr value) {
- header.sh_addr = value;
- header.sh_addr = (*convertor)(header.sh_addr);
- is_address_set = true;
- }
-
- //------------------------------------------------------------------------------
- bool is_address_initialized() const { return is_address_set; }
-
- //------------------------------------------------------------------------------
- const char* get_data() const { return data; }
-
- //------------------------------------------------------------------------------
- void set_data(const char* raw_data, Elf_Word size) {
- if (get_type() != SHT_NOBITS) {
- delete[] data;
- try {
- data = new char[size];
- } catch (const std::bad_alloc&) {
- data = 0;
- data_size = 0;
- size = 0;
- }
- if (0 != data && 0 != raw_data) {
- data_size = size;
- std::copy(raw_data, raw_data + size, data);
- }
- }
-
- set_size(size);
- }
-
- //------------------------------------------------------------------------------
- void set_data(const std::string& str_data) {
- return set_data(str_data.c_str(), (Elf_Word)str_data.size());
- }
-
- //------------------------------------------------------------------------------
- void append_data(const char* raw_data, Elf_Word size) {
- if (get_type() != SHT_NOBITS) {
- if (get_size() + size < data_size) {
- std::copy(raw_data, raw_data + size, data + get_size());
- } else {
- data_size = 2 * (data_size + size);
- char* new_data;
- try {
- new_data = new char[data_size];
- } catch (const std::bad_alloc&) {
- new_data = 0;
- size = 0;
- }
- if (0 != new_data) {
- std::copy(data, data + get_size(), new_data);
- std::copy(raw_data, raw_data + size, new_data + get_size());
- delete[] data;
- data = new_data;
- }
- }
- set_size(get_size() + size);
- }
- }
-
- //------------------------------------------------------------------------------
- void append_data(const std::string& str_data) {
- return append_data(str_data.c_str(), (Elf_Word)str_data.size());
- }
-
- //------------------------------------------------------------------------------
- protected:
- //------------------------------------------------------------------------------
- ELFIO_GET_SET_ACCESS(Elf64_Off, offset, header.sh_offset);
-
- //------------------------------------------------------------------------------
- void set_index(Elf_Half value) { index = value; }
-
- //------------------------------------------------------------------------------
- void load(std::istream& stream, std::streampos header_offset) {
- std::fill_n(reinterpret_cast<char*>(&header), sizeof(header), '\0');
- stream.seekg(header_offset);
- stream.read(reinterpret_cast<char*>(&header), sizeof(header));
-
- Elf_Xword size = get_size();
- if (0 == data && SHT_NULL != get_type() && SHT_NOBITS != get_type()) {
- try {
- data = new char[size];
- } catch (const std::bad_alloc&) {
- data = 0;
- data_size = 0;
- }
- if (0 != size) {
- stream.seekg((*convertor)(header.sh_offset));
- stream.read(data, size);
- data_size = size;
- }
- }
- }
-
- //------------------------------------------------------------------------------
- void save(std::ostream& f, std::streampos header_offset, std::streampos data_offset) {
- if (0 != get_index()) {
- header.sh_offset = data_offset;
- header.sh_offset = (*convertor)(header.sh_offset);
- }
-
- save_header(f, header_offset);
- if (get_type() != SHT_NOBITS && get_type() != SHT_NULL && get_size() != 0 && data != 0) {
- save_data(f, data_offset);
- }
- }
-
- //------------------------------------------------------------------------------
- private:
- //------------------------------------------------------------------------------
- void save_header(std::ostream& f, std::streampos header_offset) const {
- f.seekp(header_offset);
- f.write(reinterpret_cast<const char*>(&header), sizeof(header));
- }
-
- //------------------------------------------------------------------------------
- void save_data(std::ostream& f, std::streampos data_offset) const {
- f.seekp(data_offset);
- f.write(get_data(), get_size());
- }
-
- //------------------------------------------------------------------------------
- private:
- T header;
- Elf_Half index;
- std::string name;
- char* data;
- Elf_Word data_size;
- const endianess_convertor* convertor;
- bool is_address_set;
-};
-
-} // namespace ELFIO
-
-#endif // ELFIO_SECTION_HPP
diff --git a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_segment.hpp b/third_party/rocm/include/hip/hcc_detail/elfio/elfio_segment.hpp
deleted file mode 100644
index 59e37ec..0000000
--- a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_segment.hpp
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
-Copyright (C) 2001-2015 by Serge Lamikhov-Center
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef ELFIO_SEGMENT_HPP
-#define ELFIO_SEGMENT_HPP
-
-#include <iostream>
-#include <vector>
-
-namespace ELFIO {
-
-class segment {
- friend class elfio;
-
- public:
- virtual ~segment(){};
-
- ELFIO_GET_ACCESS_DECL(Elf_Half, index);
- ELFIO_GET_SET_ACCESS_DECL(Elf_Word, type);
- ELFIO_GET_SET_ACCESS_DECL(Elf_Word, flags);
- ELFIO_GET_SET_ACCESS_DECL(Elf_Xword, align);
- ELFIO_GET_SET_ACCESS_DECL(Elf64_Addr, virtual_address);
- ELFIO_GET_SET_ACCESS_DECL(Elf64_Addr, physical_address);
- ELFIO_GET_SET_ACCESS_DECL(Elf_Xword, file_size);
- ELFIO_GET_SET_ACCESS_DECL(Elf_Xword, memory_size);
- ELFIO_GET_ACCESS_DECL(Elf64_Off, offset);
-
- virtual const char* get_data() const = 0;
-
- virtual Elf_Half add_section_index(Elf_Half index, Elf_Xword addr_align) = 0;
- virtual Elf_Half get_sections_num() const = 0;
- virtual Elf_Half get_section_index_at(Elf_Half num) const = 0;
- virtual bool is_offset_initialized() const = 0;
-
- protected:
- ELFIO_SET_ACCESS_DECL(Elf64_Off, offset);
- ELFIO_SET_ACCESS_DECL(Elf_Half, index);
-
- virtual const std::vector<Elf_Half>& get_sections() const = 0;
- virtual void load(std::istream& stream, std::streampos header_offset) = 0;
- virtual void save(std::ostream& f, std::streampos header_offset,
- std::streampos data_offset) = 0;
-};
-
-
-//------------------------------------------------------------------------------
-template <class T>
-class segment_impl : public segment {
- public:
- //------------------------------------------------------------------------------
- segment_impl(endianess_convertor* convertor_) : convertor(convertor_) {
- is_offset_set = false;
- std::fill_n(reinterpret_cast<char*>(&ph), sizeof(ph), '\0');
- data = 0;
- }
-
- //------------------------------------------------------------------------------
- virtual ~segment_impl() { delete[] data; }
-
- //------------------------------------------------------------------------------
- // Section info functions
- ELFIO_GET_SET_ACCESS(Elf_Word, type, ph.p_type);
- ELFIO_GET_SET_ACCESS(Elf_Word, flags, ph.p_flags);
- ELFIO_GET_SET_ACCESS(Elf_Xword, align, ph.p_align);
- ELFIO_GET_SET_ACCESS(Elf64_Addr, virtual_address, ph.p_vaddr);
- ELFIO_GET_SET_ACCESS(Elf64_Addr, physical_address, ph.p_paddr);
- ELFIO_GET_SET_ACCESS(Elf_Xword, file_size, ph.p_filesz);
- ELFIO_GET_SET_ACCESS(Elf_Xword, memory_size, ph.p_memsz);
- ELFIO_GET_ACCESS(Elf64_Off, offset, ph.p_offset);
-
- //------------------------------------------------------------------------------
- Elf_Half get_index() const { return index; }
-
- //------------------------------------------------------------------------------
- const char* get_data() const { return data; }
-
- //------------------------------------------------------------------------------
- Elf_Half add_section_index(Elf_Half sec_index, Elf_Xword addr_align) {
- sections.push_back(sec_index);
- if (addr_align > get_align()) {
- set_align(addr_align);
- }
-
- return (Elf_Half)sections.size();
- }
-
- //------------------------------------------------------------------------------
- Elf_Half get_sections_num() const { return (Elf_Half)sections.size(); }
-
- //------------------------------------------------------------------------------
- Elf_Half get_section_index_at(Elf_Half num) const {
- if (num < sections.size()) {
- return sections[num];
- }
-
- return -1;
- }
-
- //------------------------------------------------------------------------------
- protected:
- //------------------------------------------------------------------------------
-
- //------------------------------------------------------------------------------
- void set_offset(Elf64_Off value) {
- ph.p_offset = value;
- ph.p_offset = (*convertor)(ph.p_offset);
- is_offset_set = true;
- }
-
- //------------------------------------------------------------------------------
- bool is_offset_initialized() const { return is_offset_set; }
-
- //------------------------------------------------------------------------------
- const std::vector<Elf_Half>& get_sections() const { return sections; }
-
- //------------------------------------------------------------------------------
- void set_index(Elf_Half value) { index = value; }
-
- //------------------------------------------------------------------------------
- void load(std::istream& stream, std::streampos header_offset) {
- stream.seekg(header_offset);
- stream.read(reinterpret_cast<char*>(&ph), sizeof(ph));
- is_offset_set = true;
-
- if (PT_NULL != get_type() && 0 != get_file_size()) {
- stream.seekg((*convertor)(ph.p_offset));
- Elf_Xword size = get_file_size();
- try {
- data = new char[size];
- } catch (const std::bad_alloc&) {
- data = 0;
- }
- if (0 != data) {
- stream.read(data, size);
- }
- }
- }
-
- //------------------------------------------------------------------------------
- void save(std::ostream& f, std::streampos header_offset, std::streampos data_offset) {
- ph.p_offset = data_offset;
- ph.p_offset = (*convertor)(ph.p_offset);
- f.seekp(header_offset);
- f.write(reinterpret_cast<const char*>(&ph), sizeof(ph));
- }
-
- //------------------------------------------------------------------------------
- private:
- T ph;
- Elf_Half index;
- char* data;
- std::vector<Elf_Half> sections;
- endianess_convertor* convertor;
- bool is_offset_set;
-};
-
-} // namespace ELFIO
-
-#endif // ELFIO_SEGMENT_HPP
diff --git a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_strings.hpp b/third_party/rocm/include/hip/hcc_detail/elfio/elfio_strings.hpp
deleted file mode 100644
index 07adc3a..0000000
--- a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_strings.hpp
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
-Copyright (C) 2001-2015 by Serge Lamikhov-Center
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef ELFIO_STRINGS_HPP
-#define ELFIO_STRINGS_HPP
-
-#include <cstdlib>
-#include <cstring>
-#include <string>
-
-namespace ELFIO {
-
-//------------------------------------------------------------------------------
-class string_section_accessor {
- public:
- //------------------------------------------------------------------------------
- string_section_accessor(section* section_) : string_section(section_) {}
-
-
- //------------------------------------------------------------------------------
- const char* get_string(Elf_Word index) const {
- if (string_section) {
- if (index < string_section->get_size()) {
- const char* data = string_section->get_data();
- if (0 != data) {
- return data + index;
- }
- }
- }
-
- return 0;
- }
-
-
- //------------------------------------------------------------------------------
- Elf_Word add_string(const char* str) {
- Elf_Word current_position = 0;
-
- if (string_section) {
- // Strings are addeded to the end of the current section data
- current_position = (Elf_Word)string_section->get_size();
-
- if (current_position == 0) {
- char empty_string = '\0';
- string_section->append_data(&empty_string, 1);
- current_position++;
- }
- string_section->append_data(str, (Elf_Word)std::strlen(str) + 1);
- }
-
- return current_position;
- }
-
-
- //------------------------------------------------------------------------------
- Elf_Word add_string(const std::string& str) { return add_string(str.c_str()); }
-
- //------------------------------------------------------------------------------
- private:
- section* string_section;
-};
-
-} // namespace ELFIO
-
-#endif // ELFIO_STRINGS_HPP
diff --git a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_symbols.hpp b/third_party/rocm/include/hip/hcc_detail/elfio/elfio_symbols.hpp
deleted file mode 100644
index 8184bcd..0000000
--- a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_symbols.hpp
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
-Copyright (C) 2001-2015 by Serge Lamikhov-Center
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef ELFIO_SYMBOLS_HPP
-#define ELFIO_SYMBOLS_HPP
-
-namespace ELFIO {
-
-//------------------------------------------------------------------------------
-class symbol_section_accessor {
- public:
- //------------------------------------------------------------------------------
- symbol_section_accessor(const elfio& elf_file_, section* symbol_section_)
- : elf_file(elf_file_), symbol_section(symbol_section_) {
- find_hash_section();
- }
-
- //------------------------------------------------------------------------------
- Elf_Xword get_symbols_num() const {
- Elf_Xword nRet = 0;
- if (0 != symbol_section->get_entry_size()) {
- nRet = symbol_section->get_size() / symbol_section->get_entry_size();
- }
-
- return nRet;
- }
-
- //------------------------------------------------------------------------------
- bool get_symbol(Elf_Xword index, std::string& name, Elf64_Addr& value, Elf_Xword& size,
- unsigned char& bind, unsigned char& type, Elf_Half& section_index,
- unsigned char& other) const {
- bool ret = false;
-
- if (elf_file.get_class() == ELFCLASS32) {
- ret = generic_get_symbol<Elf32_Sym>(index, name, value, size, bind, type, section_index,
- other);
- } else {
- ret = generic_get_symbol<Elf64_Sym>(index, name, value, size, bind, type, section_index,
- other);
- }
-
- return ret;
- }
-
- //------------------------------------------------------------------------------
- bool get_symbol(const std::string& name, Elf64_Addr& value, Elf_Xword& size,
- unsigned char& bind, unsigned char& type, Elf_Half& section_index,
- unsigned char& other) const {
- bool ret = false;
-
- if (0 != get_hash_table_index()) {
- Elf_Word nbucket = *(Elf_Word*)hash_section->get_data();
- Elf_Word nchain = *(Elf_Word*)(hash_section->get_data() + sizeof(Elf_Word));
- Elf_Word val = elf_hash((const unsigned char*)name.c_str());
-
- Elf_Word y =
- *(Elf_Word*)(hash_section->get_data() + (2 + val % nbucket) * sizeof(Elf_Word));
- std::string str;
- get_symbol(y, str, value, size, bind, type, section_index, other);
- while (str != name && STN_UNDEF != y && y < nchain) {
- y = *(Elf_Word*)(hash_section->get_data() + (2 + nbucket + y) * sizeof(Elf_Word));
- get_symbol(y, str, value, size, bind, type, section_index, other);
- }
- if (str == name) {
- ret = true;
- }
- }
-
- return ret;
- }
-
- //------------------------------------------------------------------------------
- Elf_Word add_symbol(Elf_Word name, Elf64_Addr value, Elf_Xword size, unsigned char info,
- unsigned char other, Elf_Half shndx) {
- Elf_Word nRet;
-
- if (symbol_section->get_size() == 0) {
- if (elf_file.get_class() == ELFCLASS32) {
- nRet = generic_add_symbol<Elf32_Sym>(0, 0, 0, 0, 0, 0);
- } else {
- nRet = generic_add_symbol<Elf64_Sym>(0, 0, 0, 0, 0, 0);
- }
- }
-
- if (elf_file.get_class() == ELFCLASS32) {
- nRet = generic_add_symbol<Elf32_Sym>(name, value, size, info, other, shndx);
- } else {
- nRet = generic_add_symbol<Elf64_Sym>(name, value, size, info, other, shndx);
- }
-
- return nRet;
- }
-
- //------------------------------------------------------------------------------
- Elf_Word add_symbol(Elf_Word name, Elf64_Addr value, Elf_Xword size, unsigned char bind,
- unsigned char type, unsigned char other, Elf_Half shndx) {
- return add_symbol(name, value, size, ELF_ST_INFO(bind, type), other, shndx);
- }
-
- //------------------------------------------------------------------------------
- Elf_Word add_symbol(string_section_accessor& pStrWriter, const char* str, Elf64_Addr value,
- Elf_Xword size, unsigned char info, unsigned char other, Elf_Half shndx) {
- Elf_Word index = pStrWriter.add_string(str);
- return add_symbol(index, value, size, info, other, shndx);
- }
-
- //------------------------------------------------------------------------------
- Elf_Word add_symbol(string_section_accessor& pStrWriter, const char* str, Elf64_Addr value,
- Elf_Xword size, unsigned char bind, unsigned char type, unsigned char other,
- Elf_Half shndx) {
- return add_symbol(pStrWriter, str, value, size, ELF_ST_INFO(bind, type), other, shndx);
- }
-
- //------------------------------------------------------------------------------
- private:
- //------------------------------------------------------------------------------
- void find_hash_section() {
- hash_section = 0;
- hash_section_index = 0;
- Elf_Half nSecNo = elf_file.sections.size();
- for (Elf_Half i = 0; i < nSecNo && 0 == hash_section_index; ++i) {
- const section* sec = elf_file.sections[i];
- if (sec->get_link() == symbol_section->get_index()) {
- hash_section = sec;
- hash_section_index = i;
- }
- }
- }
-
- //------------------------------------------------------------------------------
- Elf_Half get_string_table_index() const { return (Elf_Half)symbol_section->get_link(); }
-
- //------------------------------------------------------------------------------
- Elf_Half get_hash_table_index() const { return hash_section_index; }
-
- //------------------------------------------------------------------------------
- template <class T>
- bool generic_get_symbol(Elf_Xword index, std::string& name, Elf64_Addr& value, Elf_Xword& size,
- unsigned char& bind, unsigned char& type, Elf_Half& section_index,
- unsigned char& other) const {
- bool ret = false;
-
- if (index < get_symbols_num()) {
- const T* pSym = reinterpret_cast<const T*>(symbol_section->get_data() +
- index * symbol_section->get_entry_size());
-
- const endianess_convertor& convertor = elf_file.get_convertor();
-
- section* string_section = elf_file.sections[get_string_table_index()];
- string_section_accessor str_reader(string_section);
- const char* pStr = str_reader.get_string(convertor(pSym->st_name));
- if (0 != pStr) {
- name = pStr;
- }
- value = convertor(pSym->st_value);
- size = convertor(pSym->st_size);
- bind = ELF_ST_BIND(pSym->st_info);
- type = ELF_ST_TYPE(pSym->st_info);
- section_index = convertor(pSym->st_shndx);
- other = pSym->st_other;
-
- ret = true;
- }
-
- return ret;
- }
-
- //------------------------------------------------------------------------------
- template <class T>
- Elf_Word generic_add_symbol(Elf_Word name, Elf64_Addr value, Elf_Xword size, unsigned char info,
- unsigned char other, Elf_Half shndx) {
- const endianess_convertor& convertor = elf_file.get_convertor();
-
- T entry;
- entry.st_name = convertor(name);
- entry.st_value = value;
- entry.st_value = convertor(entry.st_value);
- entry.st_size = size;
- entry.st_size = convertor(entry.st_size);
- entry.st_info = convertor(info);
- entry.st_other = convertor(other);
- entry.st_shndx = convertor(shndx);
-
- symbol_section->append_data(reinterpret_cast<char*>(&entry), sizeof(entry));
-
- Elf_Word nRet = symbol_section->get_size() / sizeof(entry) - 1;
-
- return nRet;
- }
-
- //------------------------------------------------------------------------------
- private:
- const elfio& elf_file;
- section* symbol_section;
- Elf_Half hash_section_index;
- const section* hash_section;
-};
-
-} // namespace ELFIO
-
-#endif // ELFIO_SYMBOLS_HPP
diff --git a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_utils.hpp b/third_party/rocm/include/hip/hcc_detail/elfio/elfio_utils.hpp
deleted file mode 100644
index b1bb00e..0000000
--- a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_utils.hpp
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
-Copyright (C) 2001-2015 by Serge Lamikhov-Center
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef ELFIO_UTILS_HPP
-#define ELFIO_UTILS_HPP
-
-#define ELFIO_GET_ACCESS(TYPE, NAME, FIELD) \
- TYPE get_##NAME() const { return (*convertor)(FIELD); }
-#define ELFIO_SET_ACCESS(TYPE, NAME, FIELD) \
- void set_##NAME(TYPE value) { \
- FIELD = value; \
- FIELD = (*convertor)(FIELD); \
- }
-#define ELFIO_GET_SET_ACCESS(TYPE, NAME, FIELD) \
- TYPE get_##NAME() const { return (*convertor)(FIELD); } \
- void set_##NAME(TYPE value) { \
- FIELD = value; \
- FIELD = (*convertor)(FIELD); \
- }
-
-#define ELFIO_GET_ACCESS_DECL(TYPE, NAME) virtual TYPE get_##NAME() const = 0
-
-#define ELFIO_SET_ACCESS_DECL(TYPE, NAME) virtual void set_##NAME(TYPE value) = 0
-
-#define ELFIO_GET_SET_ACCESS_DECL(TYPE, NAME) \
- virtual TYPE get_##NAME() const = 0; \
- virtual void set_##NAME(TYPE value) = 0
-
-namespace ELFIO {
-
-//------------------------------------------------------------------------------
-class endianess_convertor {
- public:
- //------------------------------------------------------------------------------
- endianess_convertor() { need_conversion = false; }
-
- //------------------------------------------------------------------------------
- void setup(unsigned char elf_file_encoding) {
- need_conversion = (elf_file_encoding != get_host_encoding());
- }
-
- //------------------------------------------------------------------------------
- uint64_t operator()(uint64_t value) const {
- if (!need_conversion) {
- return value;
- }
- value = ((value & 0x00000000000000FFull) << 56) | ((value & 0x000000000000FF00ull) << 40) |
- ((value & 0x0000000000FF0000ull) << 24) | ((value & 0x00000000FF000000ull) << 8) |
- ((value & 0x000000FF00000000ull) >> 8) | ((value & 0x0000FF0000000000ull) >> 24) |
- ((value & 0x00FF000000000000ull) >> 40) | ((value & 0xFF00000000000000ull) >> 56);
-
- return value;
- }
-
- //------------------------------------------------------------------------------
- int64_t operator()(int64_t value) const {
- if (!need_conversion) {
- return value;
- }
- return (int64_t)(*this)((uint64_t)value);
- }
-
- //------------------------------------------------------------------------------
- uint32_t operator()(uint32_t value) const {
- if (!need_conversion) {
- return value;
- }
- value = ((value & 0x000000FF) << 24) | ((value & 0x0000FF00) << 8) |
- ((value & 0x00FF0000) >> 8) | ((value & 0xFF000000) >> 24);
-
- return value;
- }
-
- //------------------------------------------------------------------------------
- int32_t operator()(int32_t value) const {
- if (!need_conversion) {
- return value;
- }
- return (int32_t)(*this)((uint32_t)value);
- }
-
- //------------------------------------------------------------------------------
- uint16_t operator()(uint16_t value) const {
- if (!need_conversion) {
- return value;
- }
- value = ((value & 0x00FF) << 8) | ((value & 0xFF00) >> 8);
-
- return value;
- }
-
- //------------------------------------------------------------------------------
- int16_t operator()(int16_t value) const {
- if (!need_conversion) {
- return value;
- }
- return (int16_t)(*this)((uint16_t)value);
- }
-
- //------------------------------------------------------------------------------
- int8_t operator()(int8_t value) const { return value; }
-
- //------------------------------------------------------------------------------
- uint8_t operator()(uint8_t value) const { return value; }
-
- //------------------------------------------------------------------------------
- private:
- //------------------------------------------------------------------------------
- unsigned char get_host_encoding() const {
- static const int tmp = 1;
- if (1 == *(char*)&tmp) {
- return ELFDATA2LSB;
- } else {
- return ELFDATA2MSB;
- }
- }
-
- //------------------------------------------------------------------------------
- private:
- bool need_conversion;
-};
-
-
-//------------------------------------------------------------------------------
-inline uint32_t elf_hash(const unsigned char* name) {
- uint32_t h = 0, g;
- while (*name) {
- h = (h << 4) + *name++;
- g = h & 0xf0000000;
- if (g != 0) h ^= g >> 24;
- h &= ~g;
- }
- return h;
-}
-
-} // namespace ELFIO
-
-#endif // ELFIO_UTILS_HPP
diff --git a/third_party/rocm/include/hip/hcc_detail/functional_grid_launch.hpp b/third_party/rocm/include/hip/hcc_detail/functional_grid_launch.hpp
deleted file mode 100644
index efe6a60..0000000
--- a/third_party/rocm/include/hip/hcc_detail/functional_grid_launch.hpp
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#pragma once
-
-#include "concepts.hpp"
-#include "helpers.hpp"
-#include "program_state.hpp"
-#include "hip_runtime_api.h"
-
-#include <cstdint>
-#include <cstring>
-#include <stdexcept>
-#include <tuple>
-#include <type_traits>
-#include <utility>
-
-hipError_t ihipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices,
- unsigned int flags, hip_impl::program_state& ps);
-
-hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim,
- dim3 blockDim, void** args,
- size_t sharedMem, hipStream_t stream,
- hip_impl::program_state& ps);
-
-hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
- int numDevices,
- unsigned int flags,
- hip_impl::program_state& ps);
-
-#pragma GCC visibility push(hidden)
-
-namespace hip_impl {
-template <typename T, typename std::enable_if<std::is_integral<T>{}>::type* = nullptr>
-inline T round_up_to_next_multiple_nonnegative(T x, T y) {
- T tmp = x + y - 1;
- return tmp - tmp % y;
-}
-
-template <
- std::size_t n,
- typename... Ts,
- typename std::enable_if<n == sizeof...(Ts)>::type* = nullptr>
-inline hip_impl::kernarg make_kernarg(
- const std::tuple<Ts...>&,
- const kernargs_size_align&,
- hip_impl::kernarg kernarg) {
- return kernarg;
-}
-
-template <
- std::size_t n,
- typename... Ts,
- typename std::enable_if<n != sizeof...(Ts)>::type* = nullptr>
-inline hip_impl::kernarg make_kernarg(
- const std::tuple<Ts...>& formals,
- const kernargs_size_align& size_align,
- hip_impl::kernarg kernarg) {
- using T = typename std::tuple_element<n, std::tuple<Ts...>>::type;
-
- static_assert(
- !std::is_reference<T>{},
- "A __global__ function cannot have a reference as one of its "
- "arguments.");
- #if defined(HIP_STRICT)
- static_assert(
- std::is_trivially_copyable<T>{},
- "Only TriviallyCopyable types can be arguments to a __global__ "
- "function");
- #endif
-
- kernarg.resize(round_up_to_next_multiple_nonnegative(
- kernarg.size(), size_align.alignment(n)) + size_align.size(n));
-
- std::memcpy(
- kernarg.data() + kernarg.size() - size_align.size(n),
- &std::get<n>(formals),
- size_align.size(n));
- return make_kernarg<n + 1>(formals, size_align, std::move(kernarg));
-}
-
-template <typename... Formals, typename... Actuals>
-inline hip_impl::kernarg make_kernarg(
- void (*kernel)(Formals...), std::tuple<Actuals...> actuals) {
- static_assert(sizeof...(Formals) == sizeof...(Actuals),
- "The count of formal arguments must match the count of actuals.");
-
- if (sizeof...(Formals) == 0) return {};
-
- std::tuple<Formals...> to_formals{std::move(actuals)};
- hip_impl::kernarg kernarg;
- kernarg.reserve(sizeof(to_formals));
-
- auto& ps = hip_impl::get_program_state();
- return make_kernarg<0>(to_formals,
- ps.get_kernargs_size_align(
- reinterpret_cast<std::uintptr_t>(kernel)),
- std::move(kernarg));
-}
-
-
-HIP_INTERNAL_EXPORTED_API hsa_agent_t target_agent(hipStream_t stream);
-
-inline
-__attribute__((visibility("hidden")))
-void hipLaunchKernelGGLImpl(
- std::uintptr_t function_address,
- const dim3& numBlocks,
- const dim3& dimBlocks,
- std::uint32_t sharedMemBytes,
- hipStream_t stream,
- void** kernarg) {
-
- const auto& kd = hip_impl::get_program_state().kernel_descriptor(function_address,
- target_agent(stream));
-
- hipModuleLaunchKernel(kd, numBlocks.x, numBlocks.y, numBlocks.z,
- dimBlocks.x, dimBlocks.y, dimBlocks.z, sharedMemBytes,
- stream, nullptr, kernarg);
-}
-} // Namespace hip_impl.
-
-
-template <class T>
-inline
-hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
- T kernel, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0) {
-
- using namespace hip_impl;
-
- hip_impl::hip_init();
- auto f = get_program_state().kernel_descriptor(reinterpret_cast<std::uintptr_t>(kernel),
- target_agent(0));
-
- return hipModuleOccupancyMaxPotentialBlockSize(gridSize, blockSize, f,
- dynSharedMemPerBlk, blockSizeLimit);
-}
-
-template <class T>
-inline
-hipError_t hipOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize,
- T kernel, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0, unsigned int flags = 0 ) {
-
- using namespace hip_impl;
-
- hip_impl::hip_init();
- if(flags != hipOccupancyDefault) return hipErrorNotSupported;
- auto f = get_program_state().kernel_descriptor(reinterpret_cast<std::uintptr_t>(kernel),
- target_agent(0));
-
- return hipModuleOccupancyMaxPotentialBlockSize(gridSize, blockSize, f,
- dynSharedMemPerBlk, blockSizeLimit);
-}
-
-template <typename... Args, typename F = void (*)(Args...)>
-inline
-void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
- std::uint32_t sharedMemBytes, hipStream_t stream,
- Args... args) {
- hip_impl::hip_init();
- auto kernarg = hip_impl::make_kernarg(kernel, std::tuple<Args...>{std::move(args)...});
- std::size_t kernarg_size = kernarg.size();
-
- void* config[]{
- HIP_LAUNCH_PARAM_BUFFER_POINTER,
- kernarg.data(),
- HIP_LAUNCH_PARAM_BUFFER_SIZE,
- &kernarg_size,
- HIP_LAUNCH_PARAM_END};
-
- hip_impl::hipLaunchKernelGGLImpl(reinterpret_cast<std::uintptr_t>(kernel),
- numBlocks, dimBlocks, sharedMemBytes,
- stream, &config[0]);
-}
-
-template <typename F>
-inline
-__attribute__((visibility("hidden")))
-hipError_t hipLaunchCooperativeKernel(F f, dim3 gridDim, dim3 blockDim,
- void** args, size_t sharedMem,
- hipStream_t stream) {
- hip_impl::hip_init();
- auto& ps = hip_impl::get_program_state();
- return hipLaunchCooperativeKernel(reinterpret_cast<void*>(f), gridDim,
- blockDim, args, sharedMem, stream, ps);
-}
-
-inline
-__attribute__((visibility("hidden")))
-hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
- int numDevices,
- unsigned int flags) {
-
- hip_impl::hip_init();
- auto& ps = hip_impl::get_program_state();
- return hipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, ps);
-}
-
-#pragma GCC visibility pop
diff --git a/third_party/rocm/include/hip/hcc_detail/grid_launch.h b/third_party/rocm/include/hip/hcc_detail/grid_launch.h
deleted file mode 100644
index 22841a5..0000000
--- a/third_party/rocm/include/hip/hcc_detail/grid_launch.h
+++ /dev/null
@@ -1,67 +0,0 @@
-#pragma once
-
-#include <stdint.h>
-
-#include <hc_defines.h>
-
-#define GRID_LAUNCH_VERSION 20
-
-// Extern definitions
-namespace hc{
-class completion_future;
-class accelerator_view;
-}
-
-
-// 3 dim structure for groups and grids.
-typedef struct gl_dim3
-{
- int x,y,z;
- gl_dim3(uint32_t _x=1, uint32_t _y=1, uint32_t _z=1) : x(_x), y(_y), z(_z) {};
-} gl_dim3;
-
-typedef enum gl_barrier_bit {
- barrier_bit_queue_default,
- barrier_bit_none,
- barrier_bit_wait,
-} gl_barrier_bit;
-
-
-// grid_launch_parm contains information used to launch the kernel.
-typedef struct grid_launch_parm
-{
- //! Grid dimensions
- gl_dim3 grid_dim;
-
- //! Group dimensions
- gl_dim3 group_dim;
-
- //! Amount of dynamic group memory to use with the kernel launch.
- //! This memory is in addition to the amount used statically in the kernel.
- unsigned int dynamic_group_mem_bytes;
-
- //! Control setting of barrier bit on per-packet basis:
- //! See gl_barrier_bit description.
- //! Placeholder, is not used to control packet dispatch yet
- enum gl_barrier_bit barrier_bit;
-
- //! Value of packet fences to apply to launch.
- //! The correspond to the value of bits 9:14 in the AQL packet,
- //! see HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE and hsa_fence_scope_t.
- unsigned int launch_fence;
-
- //! Pointer to the accelerator_view where the kernel should execute.
- //! If NULL, the default view on the default accelerator is used.
- hc::accelerator_view *av;
-
- //! Pointer to the completion_future used to track the status of the command.
- //! If NULL, the command does not write status. In this case,
- //! synchronization can be enforced with queue-level waits or
- //! waiting on younger commands.
- hc::completion_future *cf;
-
- grid_launch_parm() = default;
-} grid_launch_parm;
-
-
-extern void init_grid_launch(grid_launch_parm *gl);
diff --git a/third_party/rocm/include/hip/hcc_detail/grid_launch.hpp b/third_party/rocm/include/hip/hcc_detail/grid_launch.hpp
deleted file mode 100644
index 04ce7e0..0000000
--- a/third_party/rocm/include/hip/hcc_detail/grid_launch.hpp
+++ /dev/null
@@ -1,50 +0,0 @@
-#pragma once
-
-#include "grid_launch.h"
-#include "hc.hpp"
-
-class grid_launch_parm_cxx : public grid_launch_parm
-{
-public:
- grid_launch_parm_cxx() = default;
-
- // customized serialization: don't need av and cf in kernel
- __attribute__((annotate("serialize")))
- void __cxxamp_serialize(Kalmar::Serialize& s) const {
- s.Append(sizeof(int), &grid_dim.x);
- s.Append(sizeof(int), &grid_dim.y);
- s.Append(sizeof(int), &grid_dim.z);
- s.Append(sizeof(int), &group_dim.x);
- s.Append(sizeof(int), &group_dim.y);
- s.Append(sizeof(int), &group_dim.z);
- }
-
- __attribute__((annotate("user_deserialize")))
- grid_launch_parm_cxx(int grid_dim_x, int grid_dim_y, int grid_dim_z,
- int group_dim_x, int group_dim_y, int group_dim_z) {
- grid_dim.x = grid_dim_x;
- grid_dim.y = grid_dim_y;
- grid_dim.z = grid_dim_z;
- group_dim.x = group_dim_x;
- group_dim.y = group_dim_y;
- group_dim.z = group_dim_z;
- }
-};
-
-
-extern inline void grid_launch_init(grid_launch_parm *lp) {
- lp->grid_dim.x = lp->grid_dim.y = lp->grid_dim.z = 1;
-
- lp->group_dim.x = lp->group_dim.y = lp->group_dim.z = 1;
-
- lp->dynamic_group_mem_bytes = 0;
-
- lp->barrier_bit = barrier_bit_queue_default;
- lp->launch_fence = -1;
-
- // TODO - set to NULL?
- static hc::accelerator_view av = hc::accelerator().get_default_view();
- lp->av = &av;
- lp->cf = NULL;
-}
-
diff --git a/third_party/rocm/include/hip/hcc_detail/grid_launch_GGL.hpp b/third_party/rocm/include/hip/hcc_detail/grid_launch_GGL.hpp
deleted file mode 100644
index 1c05279..0000000
--- a/third_party/rocm/include/hip/hcc_detail/grid_launch_GGL.hpp
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-#pragma once
-
-#if GENERIC_GRID_LAUNCH == 1
-#if __hcc_workweek__ >= 17481
-#include "functional_grid_launch.hpp"
-#else
-#include "macro_based_grid_launch.hpp"
-#endif
-#endif // GENERIC_GRID_LAUNCH
\ No newline at end of file
diff --git a/third_party/rocm/include/hip/hcc_detail/helpers.hpp b/third_party/rocm/include/hip/hcc_detail/helpers.hpp
deleted file mode 100644
index b94b126..0000000
--- a/third_party/rocm/include/hip/hcc_detail/helpers.hpp
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#pragma once
-#include "concepts.hpp"
-
-#include <type_traits> // For std::conditional, std::decay, std::enable_if,
- // std::false_type, std result_of and std::true_type.
-#include <utility> // For std::declval.
-
-#ifdef __has_include // Check if __has_include is present
-# if __has_include(<version>) // Check for version header
-# include <version>
-# if defined(__cpp_lib_is_invocable) && !defined(HIP_HAS_INVOCABLE)
-# define HIP_HAS_INVOCABLE __cpp_lib_is_invocable
-# endif
-# if defined(__cpp_lib_result_of_sfinae) && !defined(HIP_HAS_RESULT_OF_SFINAE)
-# define HIP_HAS_RESULT_OF_SFINAE __cpp_lib_result_of_sfinae
-# endif
-# endif
-#endif
-
-#ifndef HIP_HAS_INVOCABLE
-#define HIP_HAS_INVOCABLE 0
-#endif
-
-#ifndef HIP_HAS_RESULT_OF_SFINAE
-#define HIP_HAS_RESULT_OF_SFINAE 0
-#endif
-
-namespace std { // TODO: these should be removed as soon as possible.
-#if (__cplusplus < 201406L)
-#if (__cplusplus < 201402L)
-template <bool cond, typename T = void>
-using enable_if_t = typename enable_if<cond, T>::type;
-template <bool cond, typename T, typename U>
-using conditional_t = typename conditional<cond, T, U>::type;
-template <typename T>
-using decay_t = typename decay<T>::type;
-template <FunctionalProcedure F, typename... Ts>
-using result_of_t = typename result_of<F(Ts...)>::type;
-template <typename T>
-using remove_reference_t = typename remove_reference<T>::type;
-#endif
-#endif
-} // namespace std
-
-namespace hip_impl {
-template <typename...>
-using void_t_ = void;
-
-#if HIP_HAS_INVOCABLE
-template <typename, typename = void>
-struct is_callable_impl;
-
-template <FunctionalProcedure F, typename... Ts>
-struct is_callable_impl<F(Ts...)> : std::is_invocable<F, Ts...> {};
-#elif HIP_HAS_RESULT_OF_SFINAE
-template <typename, typename = void>
-struct is_callable_impl : std::false_type {};
-
-template <FunctionalProcedure F, typename... Ts>
-struct is_callable_impl<F(Ts...), void_t_<typename std::result_of<F(Ts...)>::type > > : std::true_type {};
-#else
-template <class Base, class T, class Derived>
-auto simple_invoke(T Base::*pmd, Derived&& ref)
--> decltype(static_cast<Derived&&>(ref).*pmd);
-
-template <class PMD, class Pointer>
-auto simple_invoke(PMD&& pmd, Pointer&& ptr)
--> decltype((*static_cast<Pointer&&>(ptr)).*static_cast<PMD&&>(pmd));
-
-template <class Base, class T, class Derived>
-auto simple_invoke(T Base::*pmd, const std::reference_wrapper<Derived>& ref)
--> decltype(ref.get().*pmd);
-
-template <class Base, class T, class Derived, class... Args>
-auto simple_invoke(T Base::*pmf, Derived&& ref, Args&&... args)
--> decltype((static_cast<Derived&&>(ref).*pmf)(static_cast<Args&&>(args)...));
-
-template <class PMF, class Pointer, class... Args>
-auto simple_invoke(PMF&& pmf, Pointer&& ptr, Args&&... args)
--> decltype(((*static_cast<Pointer&&>(ptr)).*static_cast<PMF&&>(pmf))(static_cast<Args&&>(args)...));
-
-template <class Base, class T, class Derived, class... Args>
-auto simple_invoke(T Base::*pmf, const std::reference_wrapper<Derived>& ref, Args&&... args)
--> decltype((ref.get().*pmf)(static_cast<Args&&>(args)...));
-
-template<class F, class... Ts>
-auto simple_invoke(F&& f, Ts&&... xs)
--> decltype(f(static_cast<Ts&&>(xs)...));
-
-template <typename, typename = void>
-struct is_callable_impl : std::false_type {};
-
-template <FunctionalProcedure F, typename... Ts>
-struct is_callable_impl<F(Ts...), void_t_<decltype(simple_invoke(std::declval<F>(), std::declval<Ts>()...))> >
- : std::true_type {};
-
-#endif
-
-template <typename Call>
-struct is_callable : is_callable_impl<Call> {};
-
-#define count_macro_args_impl_hip_(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, \
- _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, \
- _26, _27, _28, _29, _30, _31, _n, ...) \
- _n
-#define count_macro_args_hip_(...) \
- count_macro_args_impl_hip_(, ##__VA_ARGS__, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, \
- 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, \
- 0)
-
-#define overloaded_macro_expand_hip_(macro, arg_cnt) macro##arg_cnt
-#define overload_macro_impl_hip_(macro, arg_cnt) overloaded_macro_expand_hip_(macro, arg_cnt)
-#define overload_macro_hip_(macro, ...) \
- overload_macro_impl_hip_(macro, count_macro_args_hip_(__VA_ARGS__))(__VA_ARGS__)
-} // namespace hip_impl
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_atomic.h b/third_party/rocm/include/hip/hcc_detail/hip_atomic.h
deleted file mode 100644
index a1370ce..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_atomic.h
+++ /dev/null
@@ -1,286 +0,0 @@
-#pragma once
-
-#include "device_functions.h"
-
-__device__
-inline
-int atomicCAS(int* address, int compare, int val)
-{
- __atomic_compare_exchange_n(
- address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
-
- return compare;
-}
-__device__
-inline
-unsigned int atomicCAS(
- unsigned int* address, unsigned int compare, unsigned int val)
-{
- __atomic_compare_exchange_n(
- address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
-
- return compare;
-}
-__device__
-inline
-unsigned long long atomicCAS(
- unsigned long long* address,
- unsigned long long compare,
- unsigned long long val)
-{
- __atomic_compare_exchange_n(
- address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
-
- return compare;
-}
-
-__device__
-inline
-int atomicAdd(int* address, int val)
-{
- return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-unsigned int atomicAdd(unsigned int* address, unsigned int val)
-{
- return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-unsigned long long atomicAdd(
- unsigned long long* address, unsigned long long val)
-{
- return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-float atomicAdd(float* address, float val)
-{
- return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
-}
-
-DEPRECATED("use atomicAdd instead")
-__device__
-inline
-void atomicAddNoRet(float* address, float val)
-{
- __ockl_atomic_add_noret_f32(address, val);
-}
-
-__device__
-inline
-double atomicAdd(double* address, double val)
-{
- unsigned long long* uaddr{reinterpret_cast<unsigned long long*>(address)};
- unsigned long long r{__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
-
- unsigned long long old;
- do {
- old = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
-
- if (r != old) { r = old; continue; }
-
- r = atomicCAS(
- uaddr, r, __double_as_longlong(val + __longlong_as_double(r)));
-
- if (r == old) break;
- } while (true);
-
- return __longlong_as_double(r);
-}
-
-__device__
-inline
-int atomicSub(int* address, int val)
-{
- return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-unsigned int atomicSub(unsigned int* address, unsigned int val)
-{
- return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
-}
-
-__device__
-inline
-int atomicExch(int* address, int val)
-{
- return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-unsigned int atomicExch(unsigned int* address, unsigned int val)
-{
- return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-unsigned long long atomicExch(unsigned long long* address, unsigned long long val)
-{
- return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-float atomicExch(float* address, float val)
-{
- return __uint_as_float(__atomic_exchange_n(
- reinterpret_cast<unsigned int*>(address),
- __float_as_uint(val),
- __ATOMIC_RELAXED));
-}
-
-__device__
-inline
-int atomicMin(int* address, int val)
-{
- return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-unsigned int atomicMin(unsigned int* address, unsigned int val)
-{
- return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-unsigned long long atomicMin(
- unsigned long long* address, unsigned long long val)
-{
- unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
- while (val < tmp) {
- const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
-
- if (tmp1 != tmp) { tmp = tmp1; continue; }
-
- tmp = atomicCAS(address, tmp, val);
- }
-
- return tmp;
-}
-
-__device__
-inline
-int atomicMax(int* address, int val)
-{
- return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-unsigned int atomicMax(unsigned int* address, unsigned int val)
-{
- return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-unsigned long long atomicMax(
- unsigned long long* address, unsigned long long val)
-{
- unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
- while (tmp < val) {
- const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
-
- if (tmp1 != tmp) { tmp = tmp1; continue; }
-
- tmp = atomicCAS(address, tmp, val);
- }
-
- return tmp;
-}
-
-__device__
-inline
-unsigned int atomicInc(unsigned int* address, unsigned int val)
-{
- __device__
- extern
- unsigned int __builtin_amdgcn_atomic_inc(
- unsigned int*,
- unsigned int,
- unsigned int,
- unsigned int,
- bool) __asm("llvm.amdgcn.atomic.inc.i32.p0i32");
-
- return __builtin_amdgcn_atomic_inc(
- address, val, __ATOMIC_RELAXED, 1 /* Device scope */, false);
-}
-
-__device__
-inline
-unsigned int atomicDec(unsigned int* address, unsigned int val)
-{
- __device__
- extern
- unsigned int __builtin_amdgcn_atomic_dec(
- unsigned int*,
- unsigned int,
- unsigned int,
- unsigned int,
- bool) __asm("llvm.amdgcn.atomic.dec.i32.p0i32");
-
- return __builtin_amdgcn_atomic_dec(
- address, val, __ATOMIC_RELAXED, 1 /* Device scope */, false);
-}
-
-__device__
-inline
-int atomicAnd(int* address, int val)
-{
- return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-unsigned int atomicAnd(unsigned int* address, unsigned int val)
-{
- return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-unsigned long long atomicAnd(
- unsigned long long* address, unsigned long long val)
-{
- return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
-}
-
-__device__
-inline
-int atomicOr(int* address, int val)
-{
- return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-unsigned int atomicOr(unsigned int* address, unsigned int val)
-{
- return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-unsigned long long atomicOr(
- unsigned long long* address, unsigned long long val)
-{
- return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
-}
-
-__device__
-inline
-int atomicXor(int* address, int val)
-{
- return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-unsigned int atomicXor(unsigned int* address, unsigned int val)
-{
- return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-unsigned long long atomicXor(
- unsigned long long* address, unsigned long long val)
-{
- return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
-}
-
-// TODO: add scoped atomics i.e. atomic{*}_system && atomic{*}_block.
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_common.h b/third_party/rocm/include/hip/hcc_detail/hip_common.h
deleted file mode 100644
index 2e2abac..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_common.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
-Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the "Software"), to deal in
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-of the Software, and to permit persons to whom the Software is furnished to do
-so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COMMON_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COMMON_H
-
-#if defined(__HCC__)
-#define __HCC_OR_HIP_CLANG__ 1
-#define __HCC_ONLY__ 1
-#define __HIP_CLANG_ONLY__ 0
-#elif defined(__clang__) && defined(__HIP__)
-#define __HCC_OR_HIP_CLANG__ 1
-#define __HCC_ONLY__ 0
-#define __HIP_CLANG_ONLY__ 1
-#else
-#define __HCC_OR_HIP_CLANG__ 0
-#define __HCC_ONLY__ 0
-#define __HIP_CLANG_ONLY__ 0
-#endif
-
-#endif // HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COMMON_H
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_complex.h b/third_party/rocm/include/hip/hcc_detail/hip_complex.h
deleted file mode 100644
index 11648ce..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_complex.h
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COMPLEX_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COMPLEX_H
-
-#include "hip/hcc_detail/hip_vector_types.h"
-
-// TODO: Clang has a bug which allows device functions to call std functions
-// when std functions are introduced into default namespace by using statement.
-// math.h may be included after this bug is fixed.
-#if __cplusplus
-#include <cmath>
-#else
-#include "math.h"
-#endif
-
-#if __cplusplus
-#define COMPLEX_NEG_OP_OVERLOAD(type) \
- __device__ __host__ static inline type operator-(const type& op) { \
- type ret; \
- ret.x = -op.x; \
- ret.y = -op.y; \
- return ret; \
- }
-
-#define COMPLEX_EQ_OP_OVERLOAD(type) \
- __device__ __host__ static inline bool operator==(const type& lhs, const type& rhs) { \
- return lhs.x == rhs.x && lhs.y == rhs.y; \
- }
-
-#define COMPLEX_NE_OP_OVERLOAD(type) \
- __device__ __host__ static inline bool operator!=(const type& lhs, const type& rhs) { \
- return !(lhs == rhs); \
- }
-
-#define COMPLEX_ADD_OP_OVERLOAD(type) \
- __device__ __host__ static inline type operator+(const type& lhs, const type& rhs) { \
- type ret; \
- ret.x = lhs.x + rhs.x; \
- ret.y = lhs.y + rhs.y; \
- return ret; \
- }
-
-#define COMPLEX_SUB_OP_OVERLOAD(type) \
- __device__ __host__ static inline type operator-(const type& lhs, const type& rhs) { \
- type ret; \
- ret.x = lhs.x - rhs.x; \
- ret.y = lhs.y - rhs.y; \
- return ret; \
- }
-
-#define COMPLEX_MUL_OP_OVERLOAD(type) \
- __device__ __host__ static inline type operator*(const type& lhs, const type& rhs) { \
- type ret; \
- ret.x = lhs.x * rhs.x - lhs.y * rhs.y; \
- ret.y = lhs.x * rhs.y + lhs.y * rhs.x; \
- return ret; \
- }
-
-#define COMPLEX_DIV_OP_OVERLOAD(type) \
- __device__ __host__ static inline type operator/(const type& lhs, const type& rhs) { \
- type ret; \
- ret.x = (lhs.x * rhs.x + lhs.y * rhs.y); \
- ret.y = (rhs.x * lhs.y - lhs.x * rhs.y); \
- ret.x = ret.x / (rhs.x * rhs.x + rhs.y * rhs.y); \
- ret.y = ret.y / (rhs.x * rhs.x + rhs.y * rhs.y); \
- return ret; \
- }
-
-#define COMPLEX_ADD_PREOP_OVERLOAD(type) \
- __device__ __host__ static inline type& operator+=(type& lhs, const type& rhs) { \
- lhs.x += rhs.x; \
- lhs.y += rhs.y; \
- return lhs; \
- }
-
-#define COMPLEX_SUB_PREOP_OVERLOAD(type) \
- __device__ __host__ static inline type& operator-=(type& lhs, const type& rhs) { \
- lhs.x -= rhs.x; \
- lhs.y -= rhs.y; \
- return lhs; \
- }
-
-#define COMPLEX_MUL_PREOP_OVERLOAD(type) \
- __device__ __host__ static inline type& operator*=(type& lhs, const type& rhs) { \
- lhs = lhs * rhs; \
- return lhs; \
- }
-
-#define COMPLEX_DIV_PREOP_OVERLOAD(type) \
- __device__ __host__ static inline type& operator/=(type& lhs, const type& rhs) { \
- lhs = lhs / rhs; \
- return lhs; \
- }
-
-#define COMPLEX_SCALAR_PRODUCT(type, type1) \
- __device__ __host__ static inline type operator*(const type& lhs, type1 rhs) { \
- type ret; \
- ret.x = lhs.x * rhs; \
- ret.y = lhs.y * rhs; \
- return ret; \
- }
-
-#endif
-
-typedef float2 hipFloatComplex;
-
-__device__ __host__ static inline float hipCrealf(hipFloatComplex z) { return z.x; }
-
-__device__ __host__ static inline float hipCimagf(hipFloatComplex z) { return z.y; }
-
-__device__ __host__ static inline hipFloatComplex make_hipFloatComplex(float a, float b) {
- hipFloatComplex z;
- z.x = a;
- z.y = b;
- return z;
-}
-
-__device__ __host__ static inline hipFloatComplex hipConjf(hipFloatComplex z) {
- hipFloatComplex ret;
- ret.x = z.x;
- ret.y = -z.y;
- return ret;
-}
-
-__device__ __host__ static inline float hipCsqabsf(hipFloatComplex z) {
- return z.x * z.x + z.y * z.y;
-}
-
-__device__ __host__ static inline hipFloatComplex hipCaddf(hipFloatComplex p, hipFloatComplex q) {
- return make_hipFloatComplex(p.x + q.x, p.y + q.y);
-}
-
-__device__ __host__ static inline hipFloatComplex hipCsubf(hipFloatComplex p, hipFloatComplex q) {
- return make_hipFloatComplex(p.x - q.x, p.y - q.y);
-}
-
-__device__ __host__ static inline hipFloatComplex hipCmulf(hipFloatComplex p, hipFloatComplex q) {
- return make_hipFloatComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y);
-}
-
-__device__ __host__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hipFloatComplex q) {
- float sqabs = hipCsqabsf(q);
- hipFloatComplex ret;
- ret.x = (p.x * q.x + p.y * q.y) / sqabs;
- ret.y = (p.y * q.x - p.x * q.y) / sqabs;
- return ret;
-}
-
-__device__ __host__ static inline float hipCabsf(hipFloatComplex z) { return sqrtf(hipCsqabsf(z)); }
-
-
-typedef double2 hipDoubleComplex;
-
-__device__ __host__ static inline double hipCreal(hipDoubleComplex z) { return z.x; }
-
-__device__ __host__ static inline double hipCimag(hipDoubleComplex z) { return z.y; }
-
-__device__ __host__ static inline hipDoubleComplex make_hipDoubleComplex(double a, double b) {
- hipDoubleComplex z;
- z.x = a;
- z.y = b;
- return z;
-}
-
-__device__ __host__ static inline hipDoubleComplex hipConj(hipDoubleComplex z) {
- hipDoubleComplex ret;
- ret.x = z.x;
- ret.y = -z.y;
- return ret;
-}
-
-__device__ __host__ static inline double hipCsqabs(hipDoubleComplex z) {
- return z.x * z.x + z.y * z.y;
-}
-
-__device__ __host__ static inline hipDoubleComplex hipCadd(hipDoubleComplex p, hipDoubleComplex q) {
- return make_hipDoubleComplex(p.x + q.x, p.y + q.y);
-}
-
-__device__ __host__ static inline hipDoubleComplex hipCsub(hipDoubleComplex p, hipDoubleComplex q) {
- return make_hipDoubleComplex(p.x - q.x, p.y - q.y);
-}
-
-__device__ __host__ static inline hipDoubleComplex hipCmul(hipDoubleComplex p, hipDoubleComplex q) {
- return make_hipDoubleComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y);
-}
-
-__device__ __host__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, hipDoubleComplex q) {
- double sqabs = hipCsqabs(q);
- hipDoubleComplex ret;
- ret.x = (p.x * q.x + p.y * q.y) / sqabs;
- ret.y = (p.y * q.x - p.x * q.y) / sqabs;
- return ret;
-}
-
-__device__ __host__ static inline double hipCabs(hipDoubleComplex z) { return sqrtf(hipCsqabs(z)); }
-
-
-#if __cplusplus
-
-COMPLEX_NEG_OP_OVERLOAD(hipFloatComplex)
-COMPLEX_EQ_OP_OVERLOAD(hipFloatComplex)
-COMPLEX_NE_OP_OVERLOAD(hipFloatComplex)
-COMPLEX_ADD_OP_OVERLOAD(hipFloatComplex)
-COMPLEX_SUB_OP_OVERLOAD(hipFloatComplex)
-COMPLEX_MUL_OP_OVERLOAD(hipFloatComplex)
-COMPLEX_DIV_OP_OVERLOAD(hipFloatComplex)
-COMPLEX_ADD_PREOP_OVERLOAD(hipFloatComplex)
-COMPLEX_SUB_PREOP_OVERLOAD(hipFloatComplex)
-COMPLEX_MUL_PREOP_OVERLOAD(hipFloatComplex)
-COMPLEX_DIV_PREOP_OVERLOAD(hipFloatComplex)
-COMPLEX_SCALAR_PRODUCT(hipFloatComplex, unsigned short)
-COMPLEX_SCALAR_PRODUCT(hipFloatComplex, signed short)
-COMPLEX_SCALAR_PRODUCT(hipFloatComplex, unsigned int)
-COMPLEX_SCALAR_PRODUCT(hipFloatComplex, signed int)
-COMPLEX_SCALAR_PRODUCT(hipFloatComplex, float)
-COMPLEX_SCALAR_PRODUCT(hipFloatComplex, unsigned long)
-COMPLEX_SCALAR_PRODUCT(hipFloatComplex, signed long)
-COMPLEX_SCALAR_PRODUCT(hipFloatComplex, double)
-COMPLEX_SCALAR_PRODUCT(hipFloatComplex, signed long long)
-COMPLEX_SCALAR_PRODUCT(hipFloatComplex, unsigned long long)
-
-COMPLEX_NEG_OP_OVERLOAD(hipDoubleComplex)
-COMPLEX_EQ_OP_OVERLOAD(hipDoubleComplex)
-COMPLEX_NE_OP_OVERLOAD(hipDoubleComplex)
-COMPLEX_ADD_OP_OVERLOAD(hipDoubleComplex)
-COMPLEX_SUB_OP_OVERLOAD(hipDoubleComplex)
-COMPLEX_MUL_OP_OVERLOAD(hipDoubleComplex)
-COMPLEX_DIV_OP_OVERLOAD(hipDoubleComplex)
-COMPLEX_ADD_PREOP_OVERLOAD(hipDoubleComplex)
-COMPLEX_SUB_PREOP_OVERLOAD(hipDoubleComplex)
-COMPLEX_MUL_PREOP_OVERLOAD(hipDoubleComplex)
-COMPLEX_DIV_PREOP_OVERLOAD(hipDoubleComplex)
-COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned short)
-COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, signed short)
-COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned int)
-COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, signed int)
-COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, float)
-COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned long)
-COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, signed long)
-COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, double)
-COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, signed long long)
-COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned long long)
-
-#endif
-
-
-typedef hipFloatComplex hipComplex;
-
-__device__ __host__ static inline hipComplex make_hipComplex(float x, float y) {
- return make_hipFloatComplex(x, y);
-}
-
-__device__ __host__ static inline hipFloatComplex hipComplexDoubleToFloat(hipDoubleComplex z) {
- return make_hipFloatComplex((float)z.x, (float)z.y);
-}
-
-__device__ __host__ static inline hipDoubleComplex hipComplexFloatToDouble(hipFloatComplex z) {
- return make_hipDoubleComplex((double)z.x, (double)z.y);
-}
-
-__device__ __host__ static inline hipComplex hipCfmaf(hipComplex p, hipComplex q, hipComplex r) {
- float real = (p.x * q.x) + r.x;
- float imag = (q.x * p.y) + r.y;
-
- real = -(p.y * q.y) + real;
- imag = (p.x * q.y) + imag;
-
- return make_hipComplex(real, imag);
-}
-
-__device__ __host__ static inline hipDoubleComplex hipCfma(hipDoubleComplex p, hipDoubleComplex q,
- hipDoubleComplex r) {
- double real = (p.x * q.x) + r.x;
- double imag = (q.x * p.y) + r.y;
-
- real = -(p.y * q.y) + real;
- imag = (p.x * q.y) + imag;
-
- return make_hipDoubleComplex(real, imag);
-}
-
-#endif //HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COMPLEX_H
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_cooperative_groups.h b/third_party/rocm/include/hip/hcc_detail/hip_cooperative_groups.h
deleted file mode 100644
index 353bdc5..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_cooperative_groups.h
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/**
- * @file hcc_detail/hip_cooperative_groups.h
- *
- * @brief Device side implementation of `Cooperative Group` feature.
- *
- * Defines new types and device API wrappers related to `Cooperative Group`
- * feature, which the programmer can directly use in his kernel(s) in order to
- * make use of this feature.
- */
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COOPERATIVE_GROUPS_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COOPERATIVE_GROUPS_H
-
-#if __cplusplus
-#include <hip/hcc_detail/hip_cooperative_groups_helper.h>
-
-namespace cooperative_groups {
-
-/** \brief The base type of all cooperative group types
- *
- * \details Holds the key properties of a constructed cooperative group type
- * object, like the group type, its size, etc
- */
-class thread_group {
- protected:
- uint32_t _type; // thread_group type
- uint32_t _size; // total number of threads in the tread_group
- uint64_t _mask; // Lanemask for coalesced and tiled partitioned group types,
- // LSB represents lane 0, and MSB represents lane 63
-
- // Construct a thread group, and set thread group type and other essential
- // thread group properties. This generic thread group is directly constructed
- // only when the group is supposed to contain only the calling the thread
- // (throurh the API - `this_thread()`), and in all other cases, this thread
- // group object is a sub-object of some other derived thread group object
- __CG_QUALIFIER__ thread_group(internal::group_type type, uint32_t size,
- uint64_t mask = (uint64_t)0) {
- _type = type;
- _size = size;
- _mask = mask;
- }
-
- public:
- // Total number of threads in the thread group, and this serves the purpose
- // for all derived cooperative group types since their `size` is directly
- // saved during the construction
- __CG_QUALIFIER__ uint32_t size() const {
- return _size;
- }
- // Rank of the calling thread within [0, size())
- __CG_QUALIFIER__ uint32_t thread_rank() const;
- // Is this cooperative group type valid?
- __CG_QUALIFIER__ bool is_valid() const;
- // synchronize the threads in the thread group
- __CG_QUALIFIER__ void sync() const;
-};
-
-/** \brief The multi-grid cooperative group type
- *
- * \details Represents an inter-device cooperative group type where the
- * participating threads within the group spans across multple
- * devices, running the (same) kernel on these devices
- */
-class multi_grid_group : public thread_group {
- // Only these friend functions are allowed to construct an object of this class
- // and access its resources
- friend __CG_QUALIFIER__ multi_grid_group this_multi_grid();
-
- protected:
- // Construct mutli-grid thread group (through the API this_multi_grid())
- explicit __CG_QUALIFIER__ multi_grid_group(uint32_t size)
- : thread_group(internal::cg_multi_grid, size) { }
-
- public:
- // Number of invocations participating in this multi-grid group. In other
- // words, the number of GPUs
- __CG_QUALIFIER__ uint32_t num_grids() {
- return internal::multi_grid::num_grids();
- }
- // Rank of this invocation. In other words, an ID number within the range
- // [0, num_grids()) of the GPU, this kernel is running on
- __CG_QUALIFIER__ uint32_t grid_rank() {
- return internal::multi_grid::grid_rank();
- }
- __CG_QUALIFIER__ uint32_t thread_rank() const {
- return internal::multi_grid::thread_rank();
- }
- __CG_QUALIFIER__ bool is_valid() const {
- return internal::multi_grid::is_valid();
- }
- __CG_QUALIFIER__ void sync() const {
- internal::multi_grid::sync();
- }
-};
-
-/** \brief User exposed API interface to construct multi-grid cooperative
- * group type object - `multi_grid_group`
- *
- * \details User is not allowed to directly construct an object of type
- * `multi_grid_group`. Instead, he should construct it through this
- * API function
- */
-__CG_QUALIFIER__ multi_grid_group
-this_multi_grid() {
- return multi_grid_group(internal::multi_grid::size());
-}
-
-/** \brief The grid cooperative group type
- *
- * \details Represents an inter-workgroup cooperative group type where the
- * participating threads within the group spans across multiple
- * workgroups running the (same) kernel on the same device
- */
-class grid_group : public thread_group {
- // Only these friend functions are allowed to construct an object of this class
- // and access its resources
- friend __CG_QUALIFIER__ grid_group this_grid();
-
- protected:
- // Construct grid thread group (through the API this_grid())
- explicit __CG_QUALIFIER__ grid_group(uint32_t size)
- : thread_group(internal::cg_grid, size) { }
-
- public:
- __CG_QUALIFIER__ uint32_t thread_rank() const {
- return internal::grid::thread_rank();
- }
- __CG_QUALIFIER__ bool is_valid() const {
- return internal::grid::is_valid();
- }
- __CG_QUALIFIER__ void sync() const {
- internal::grid::sync();
- }
-};
-
-/** \brief User exposed API interface to construct grid cooperative group type
- * object - `grid_group`
- *
- * \details User is not allowed to directly construct an object of type
- * `multi_grid_group`. Instead, he should construct it through this
- * API function
- */
-__CG_QUALIFIER__ grid_group
-this_grid() {
- return grid_group(internal::grid::size());
-}
-
-/** \brief The workgroup (thread-block in CUDA terminology) cooperative group
- * type
- *
- * \details Represents an intra-workgroup cooperative group type where the
- * participating threads within the group are exctly the same threads
- * which are participated in the currently executing `workgroup`
- */
-class thread_block : public thread_group {
- // Only these friend functions are allowed to construct an object of this
- // class and access its resources
- friend __CG_QUALIFIER__ thread_block this_thread_block();
-
- protected:
- // Construct a workgroup thread group (through the API this_thread_block())
- explicit __CG_QUALIFIER__ thread_block(uint32_t size)
- : thread_group(internal::cg_workgroup, size) { }
-
- public:
- // 3-dimensional block index within the grid
- __CG_QUALIFIER__ dim3 group_index() {
- return internal::workgroup::group_index();
- }
- // 3-dimensional thread index within the block
- __CG_QUALIFIER__ dim3 thread_index() {
- return internal::workgroup::thread_index();
- }
- __CG_QUALIFIER__ uint32_t thread_rank() const {
- return internal::workgroup::thread_rank();
- }
- __CG_QUALIFIER__ bool is_valid() const {
- return internal::workgroup::is_valid();
- }
- __CG_QUALIFIER__ void sync() const {
- internal::workgroup::sync();
- }
-};
-
-/** \brief User exposed API interface to construct workgroup cooperative
- * group type object - `thread_block`
- *
- * \details User is not allowed to directly construct an object of type
- * `thread_block`. Instead, he should construct it through this API
- * function
- */
-__CG_QUALIFIER__ thread_block
-this_thread_block() {
- return thread_block(internal::workgroup::size());
-}
-
-/**
- * Implemenation of all publicly exposed base class APIs
- */
-__CG_QUALIFIER__ uint32_t thread_group::thread_rank() const {
- switch (this->_type) {
- case internal::cg_multi_grid: {
- return (static_cast<const multi_grid_group*>(this)->thread_rank());
- }
- case internal::cg_grid: {
- return (static_cast<const grid_group*>(this)->thread_rank());
- }
- case internal::cg_workgroup: {
- return (static_cast<const thread_block*>(this)->thread_rank());
- }
- default: {
- assert(false && "invalid cooperative group type");
- return -1;
- }
- }
-}
-
-__CG_QUALIFIER__ bool thread_group::is_valid() const {
- switch (this->_type) {
- case internal::cg_multi_grid: {
- return (static_cast<const multi_grid_group*>(this)->is_valid());
- }
- case internal::cg_grid: {
- return (static_cast<const grid_group*>(this)->is_valid());
- }
- case internal::cg_workgroup: {
- return (static_cast<const thread_block*>(this)->is_valid());
- }
- default: {
- assert(false && "invalid cooperative group type");
- return false;
- }
- }
-}
-
-__CG_QUALIFIER__ void thread_group::sync() const {
- switch (this->_type) {
- case internal::cg_multi_grid: {
- static_cast<const multi_grid_group*>(this)->sync();
- break;
- }
- case internal::cg_grid: {
- static_cast<const grid_group*>(this)->sync();
- break;
- }
- case internal::cg_workgroup: {
- static_cast<const thread_block*>(this)->sync();
- break;
- }
- default: {
- assert(false && "invalid cooperative group type");
- }
- }
-}
-
-/**
- * Implemenation of publicly exposed `wrapper` APIs on top of basic cooperative
- * group type APIs
- */
-template <class CGTy>
-__CG_QUALIFIER__ uint32_t group_size(CGTy const &g) {
- return g.size();
-}
-
-template <class CGTy>
-__CG_QUALIFIER__ uint32_t thread_rank(CGTy const &g) {
- return g.thread_rank();
-}
-
-template <class CGTy>
-__CG_QUALIFIER__ bool is_valid(CGTy const &g) {
- return g.is_valid();
-}
-
-template <class CGTy>
-__CG_QUALIFIER__ void sync(CGTy const &g) {
- g.sync();
-}
-
-} // namespace cooperative_groups
-
-#endif // __cplusplus
-#endif // HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COOPERATIVE_GROUPS_H
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_cooperative_groups_helper.h b/third_party/rocm/include/hip/hcc_detail/hip_cooperative_groups_helper.h
deleted file mode 100644
index 4e10c0d..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_cooperative_groups_helper.h
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/**
- * @file hcc_detail/hip_cooperative_groups_helper.h
- *
- * @brief Device side implementation of cooperative group feature.
- *
- * Defines helper constructs and APIs which aid the types and device API
- * wrappers defined within `hcc_detail/hip_cooperative_groups.h`.
- */
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COOPERATIVE_GROUPS_HELPER_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COOPERATIVE_GROUPS_HELPER_H
-
-#if __cplusplus
-#include <hip/hcc_detail/hip_runtime_api.h>
-#include <hip/hcc_detail/device_functions.h>
-
-#if !defined(__align__)
-#define __align__(x) __attribute__((aligned(x)))
-#endif
-
-#if !defined(__CG_QUALIFIER__)
-#define __CG_QUALIFIER__ __device__ __forceinline__
-#endif
-
-#if !defined(__CG_STATIC_QUALIFIER__)
-#define __CG_STATIC_QUALIFIER__ __device__ static __forceinline__
-#endif
-
-#if !defined(WAVEFRONT_SIZE)
-#define WAVEFRONT_SIZE 64
-#endif
-
-namespace cooperative_groups {
-
-namespace internal {
-
-/** \brief Enums representing different cooperative group types
- */
-typedef enum {
- cg_invalid,
- cg_multi_grid,
- cg_grid,
- cg_workgroup
-} group_type;
-
-/**
- * Functionalities related to multi-grid cooperative group type
- */
-namespace multi_grid {
-
-__CG_STATIC_QUALIFIER__ uint32_t num_grids() {
- return (uint32_t)__ockl_multi_grid_num_grids();
-}
-
-__CG_STATIC_QUALIFIER__ uint32_t grid_rank() {
- return (uint32_t)__ockl_multi_grid_grid_rank();
-}
-
-__CG_STATIC_QUALIFIER__ uint32_t size() {
- return (uint32_t)__ockl_multi_grid_size();
-}
-
-__CG_STATIC_QUALIFIER__ uint32_t thread_rank() {
- return (uint32_t)__ockl_multi_grid_thread_rank();
-}
-
-__CG_STATIC_QUALIFIER__ bool is_valid() {
- return (bool)__ockl_multi_grid_is_valid();
-}
-
-__CG_STATIC_QUALIFIER__ void sync() {
- __ockl_multi_grid_sync();
-}
-
-} // namespace multi_grid
-
-/**
- * Functionalities related to grid cooperative group type
- */
-namespace grid {
-
-__CG_STATIC_QUALIFIER__ uint32_t size() {
- return (uint32_t)((hipBlockDim_z * hipGridDim_z) *
- (hipBlockDim_y * hipGridDim_y) *
- (hipBlockDim_x * hipGridDim_x));
-}
-
-__CG_STATIC_QUALIFIER__ uint32_t thread_rank() {
- // Compute global id of the workgroup to which the current thread belongs to
- uint32_t blkIdx =
- (uint32_t)((hipBlockIdx_z * hipGridDim_y * hipGridDim_x) +
- (hipBlockIdx_y * hipGridDim_x) +
- (hipBlockIdx_x));
-
- // Compute total number of threads being passed to reach current workgroup
- // within grid
- uint32_t num_threads_till_current_workgroup =
- (uint32_t)(blkIdx * (hipBlockDim_x * hipBlockDim_y * hipBlockDim_z));
-
- // Compute thread local rank within current workgroup
- uint32_t local_thread_rank =
- (uint32_t)((hipThreadIdx_z * hipBlockDim_y * hipBlockDim_x) +
- (hipThreadIdx_y * hipBlockDim_x) +
- (hipThreadIdx_x));
-
- return (num_threads_till_current_workgroup + local_thread_rank);
-}
-
-__CG_STATIC_QUALIFIER__ bool is_valid() {
- return (bool)__ockl_grid_is_valid();
-}
-
-__CG_STATIC_QUALIFIER__ void sync() {
- __ockl_grid_sync();
-}
-
-} // namespace grid
-
-/**
- * Functionalities related to `workgroup` (thread_block in CUDA terminology)
- * cooperative group type
- */
-namespace workgroup {
-
-__CG_STATIC_QUALIFIER__ dim3 group_index() {
- return (dim3((uint32_t)hipBlockIdx_x, (uint32_t)hipBlockIdx_y,
- (uint32_t)hipBlockIdx_z));
-}
-
-__CG_STATIC_QUALIFIER__ dim3 thread_index() {
- return (dim3((uint32_t)hipThreadIdx_x, (uint32_t)hipThreadIdx_y,
- (uint32_t)hipThreadIdx_z));
-}
-
-__CG_STATIC_QUALIFIER__ uint32_t size() {
- return((uint32_t)(hipBlockDim_x * hipBlockDim_y * hipBlockDim_z));
-}
-
-__CG_STATIC_QUALIFIER__ uint32_t thread_rank() {
- return ((uint32_t)((hipThreadIdx_z * hipBlockDim_y * hipBlockDim_x) +
- (hipThreadIdx_y * hipBlockDim_x) +
- (hipThreadIdx_x)));
-}
-
-__CG_STATIC_QUALIFIER__ bool is_valid() {
- //TODO(mahesha) any functionality need to be added here? I believe not
- return true;
-}
-
-__CG_STATIC_QUALIFIER__ void sync() {
- __syncthreads();
-}
-
-} // namespace workgroup
-
-} // namespace internal
-
-} // namespace cooperative_groups
-
-#endif // __cplusplus
-#endif // HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COOPERATIVE_GROUPS_HELPER_H
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_fp16.h b/third_party/rocm/include/hip/hcc_detail/hip_fp16.h
deleted file mode 100644
index af004a8..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_fp16.h
+++ /dev/null
@@ -1,1658 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#pragma once
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_FP16_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_FP16_H
-
-#include <hip/hcc_detail/hip_common.h>
-
-#include "hip/hcc_detail/host_defines.h"
-#include <assert.h>
-#if defined(__cplusplus)
- #include <algorithm>
- #include <type_traits>
- #include <utility>
-#endif
-
-#if __HCC_OR_HIP_CLANG__
- typedef _Float16 _Float16_2 __attribute__((ext_vector_type(2)));
-
- struct __half_raw {
- union {
- static_assert(sizeof(_Float16) == sizeof(unsigned short), "");
-
- _Float16 data;
- unsigned short x;
- };
- };
-
- struct __half2_raw {
- union {
- static_assert(sizeof(_Float16_2) == sizeof(unsigned short[2]), "");
-
- _Float16_2 data;
- struct {
- unsigned short x;
- unsigned short y;
- };
- };
- };
-
- #if defined(__cplusplus)
- #include "hip_fp16_math_fwd.h"
- #include "hip_vector_types.h"
- #include "host_defines.h"
-
- namespace std
- {
- template<> struct is_floating_point<_Float16> : std::true_type {};
- }
-
- template<bool cond, typename T = void>
- using Enable_if_t = typename std::enable_if<cond, T>::type;
-
- // BEGIN STRUCT __HALF
- struct __half {
- protected:
- union {
- static_assert(sizeof(_Float16) == sizeof(unsigned short), "");
-
- _Float16 data;
- unsigned short __x;
- };
- public:
- // CREATORS
- __host__ __device__
- __half() = default;
- __host__ __device__
- __half(const __half_raw& x) : data{x.data} {}
- #if !defined(__HIP_NO_HALF_CONVERSIONS__)
- __host__ __device__
- __half(decltype(data) x) : data{x} {}
- template<
- typename T,
- Enable_if_t<std::is_floating_point<T>{}>* = nullptr>
- __host__ __device__
- __half(T x) : data{static_cast<_Float16>(x)} {}
- #endif
- __host__ __device__
- __half(const __half&) = default;
- __host__ __device__
- __half(__half&&) = default;
- __host__ __device__
- ~__half() = default;
-
- // CREATORS - DEVICE ONLY
- #if !defined(__HIP_NO_HALF_CONVERSIONS__)
- template<
- typename T, Enable_if_t<std::is_integral<T>{}>* = nullptr>
- __host__ __device__
- __half(T x) : data{static_cast<_Float16>(x)} {}
- #endif
-
- // MANIPULATORS
- __host__ __device__
- __half& operator=(const __half&) = default;
- __host__ __device__
- __half& operator=(__half&&) = default;
- __host__ __device__
- __half& operator=(const __half_raw& x)
- {
- data = x.data;
- return *this;
- }
- __host__ __device__
- volatile __half& operator=(const __half_raw& x) volatile
- {
- data = x.data;
- return *this;
- }
- volatile __half& operator=(const volatile __half_raw& x) volatile
- {
- data = x.data;
- return *this;
- }
- __half& operator=(__half_raw&& x)
- {
- data = x.data;
- return *this;
- }
- volatile __half& operator=(__half_raw&& x) volatile
- {
- data = x.data;
- return *this;
- }
- volatile __half& operator=(volatile __half_raw&& x) volatile
- {
- data = x.data;
- return *this;
- }
- #if !defined(__HIP_NO_HALF_CONVERSIONS__)
- template<
- typename T,
- Enable_if_t<std::is_floating_point<T>{}>* = nullptr>
- __host__ __device__
- __half& operator=(T x)
- {
- data = static_cast<_Float16>(x);
- return *this;
- }
- #endif
-
- // MANIPULATORS - DEVICE ONLY
- #if !defined(__HIP_NO_HALF_CONVERSIONS__)
- template<
- typename T, Enable_if_t<std::is_integral<T>{}>* = nullptr>
- __device__
- __half& operator=(T x)
- {
- data = static_cast<_Float16>(x);
- return *this;
- }
- #endif
-
- #if !defined(__HIP_NO_HALF_OPERATORS__)
- __device__
- __half& operator+=(const __half& x)
- {
- data += x.data;
- return *this;
- }
- __device__
- __half& operator-=(const __half& x)
- {
- data -= x.data;
- return *this;
- }
- __device__
- __half& operator*=(const __half& x)
- {
- data *= x.data;
- return *this;
- }
- __device__
- __half& operator/=(const __half& x)
- {
- data /= x.data;
- return *this;
- }
- __device__
- __half& operator++() { ++data; return *this; }
- __device__
- __half operator++(int)
- {
- __half tmp{*this};
- ++*this;
- return tmp;
- }
- __device__
- __half& operator--() { --data; return *this; }
- __device__
- __half operator--(int)
- {
- __half tmp{*this};
- --*this;
- return tmp;
- }
- #endif
-
- // ACCESSORS
- #if !defined(__HIP_NO_HALF_CONVERSIONS__)
- template<
- typename T,
- Enable_if_t<std::is_floating_point<T>{}>* = nullptr>
- __host__ __device__
- operator T() const { return data; }
- #endif
- __host__ __device__
- operator __half_raw() const { return __half_raw{data}; }
- __host__ __device__
- operator __half_raw() const volatile
- {
- return __half_raw{data};
- }
-
- #if !defined(__HIP_NO_HALF_CONVERSIONS__)
- template<
- typename T, Enable_if_t<std::is_integral<T>{}>* = nullptr>
- __host__ __device__
- operator T() const { return data; }
- #endif
-
- #if !defined(__HIP_NO_HALF_OPERATORS__)
- __device__
- __half operator+() const { return *this; }
- __device__
- __half operator-() const
- {
- __half tmp{*this};
- tmp.data = -tmp.data;
- return tmp;
- }
- #endif
-
- // FRIENDS
- #if !defined(__HIP_NO_HALF_OPERATORS__)
- friend
- inline
- __device__
- __half operator+(const __half& x, const __half& y)
- {
- return __half{x} += y;
- }
- friend
- inline
- __device__
- __half operator-(const __half& x, const __half& y)
- {
- return __half{x} -= y;
- }
- friend
- inline
- __device__
- __half operator*(const __half& x, const __half& y)
- {
- return __half{x} *= y;
- }
- friend
- inline
- __device__
- __half operator/(const __half& x, const __half& y)
- {
- return __half{x} /= y;
- }
- friend
- inline
- __device__
- bool operator==(const __half& x, const __half& y)
- {
- return x.data == y.data;
- }
- friend
- inline
- __device__
- bool operator!=(const __half& x, const __half& y)
- {
- return !(x == y);
- }
- friend
- inline
- __device__
- bool operator<(const __half& x, const __half& y)
- {
- return x.data < y.data;
- }
- friend
- inline
- __device__
- bool operator>(const __half& x, const __half& y)
- {
- return y.data < x.data;
- }
- friend
- inline
- __device__
- bool operator<=(const __half& x, const __half& y)
- {
- return !(y < x);
- }
- friend
- inline
- __device__
- bool operator>=(const __half& x, const __half& y)
- {
- return !(x < y);
- }
- #endif // !defined(__HIP_NO_HALF_OPERATORS__)
- };
- // END STRUCT __HALF
-
- // BEGIN STRUCT __HALF2
- struct __half2 {
- protected:
- union {
- static_assert(
- sizeof(_Float16_2) == sizeof(unsigned short[2]), "");
-
- _Float16_2 data;
- struct {
- unsigned short x;
- unsigned short y;
- };
- };
- public:
- // CREATORS
- __host__ __device__
- __half2() = default;
- __host__ __device__
- __half2(const __half2_raw& x) : data{x.data} {}
- __host__ __device__
- __half2(decltype(data) x) : data{x} {}
- __host__ __device__
- __half2(const __half& x, const __half& y)
- :
- data{
- static_cast<__half_raw>(x).data,
- static_cast<__half_raw>(y).data}
- {}
- __host__ __device__
- __half2(const __half2&) = default;
- __host__ __device__
- __half2(__half2&&) = default;
- __host__ __device__
- ~__half2() = default;
-
- // MANIPULATORS
- __host__ __device__
- __half2& operator=(const __half2&) = default;
- __host__ __device__
- __half2& operator=(__half2&&) = default;
- __host__ __device__
- __half2& operator=(const __half2_raw& x)
- {
- data = x.data;
- return *this;
- }
-
- // MANIPULATORS - DEVICE ONLY
- #if !defined(__HIP_NO_HALF_OPERATORS__)
- __device__
- __half2& operator+=(const __half2& x)
- {
- data += x.data;
- return *this;
- }
- __device__
- __half2& operator-=(const __half2& x)
- {
- data -= x.data;
- return *this;
- }
- __device__
- __half2& operator*=(const __half2& x)
- {
- data *= x.data;
- return *this;
- }
- __device__
- __half2& operator/=(const __half2& x)
- {
- data /= x.data;
- return *this;
- }
- __device__
- __half2& operator++() { return *this += _Float16_2{1, 1}; }
- __device__
- __half2 operator++(int)
- {
- __half2 tmp{*this};
- ++*this;
- return tmp;
- }
- __device__
- __half2& operator--() { return *this -= _Float16_2{1, 1}; }
- __device__
- __half2 operator--(int)
- {
- __half2 tmp{*this};
- --*this;
- return tmp;
- }
- #endif
-
- // ACCESSORS
- __host__ __device__
- operator decltype(data)() const { return data; }
- __host__ __device__
- operator __half2_raw() const { return __half2_raw{data}; }
-
- // ACCESSORS - DEVICE ONLY
- #if !defined(__HIP_NO_HALF_OPERATORS__)
- __device__
- __half2 operator+() const { return *this; }
- __device__
- __half2 operator-() const
- {
- __half2 tmp{*this};
- tmp.data = -tmp.data;
- return tmp;
- }
- #endif
-
- // FRIENDS
- #if !defined(__HIP_NO_HALF_OPERATORS__)
- friend
- inline
- __device__
- __half2 operator+(const __half2& x, const __half2& y)
- {
- return __half2{x} += y;
- }
- friend
- inline
- __device__
- __half2 operator-(const __half2& x, const __half2& y)
- {
- return __half2{x} -= y;
- }
- friend
- inline
- __device__
- __half2 operator*(const __half2& x, const __half2& y)
- {
- return __half2{x} *= y;
- }
- friend
- inline
- __device__
- __half2 operator/(const __half2& x, const __half2& y)
- {
- return __half2{x} /= y;
- }
- friend
- inline
- __device__
- bool operator==(const __half2& x, const __half2& y)
- {
- auto r = x.data == y.data;
- return r.x != 0 && r.y != 0;
- }
- friend
- inline
- __device__
- bool operator!=(const __half2& x, const __half2& y)
- {
- return !(x == y);
- }
- friend
- inline
- __device__
- bool operator<(const __half2& x, const __half2& y)
- {
- auto r = x.data < y.data;
- return r.x != 0 && r.y != 0;
- }
- friend
- inline
- __device__
- bool operator>(const __half2& x, const __half2& y)
- {
- return y < x;
- }
- friend
- inline
- __device__
- bool operator<=(const __half2& x, const __half2& y)
- {
- return !(y < x);
- }
- friend
- inline
- __device__
- bool operator>=(const __half2& x, const __half2& y)
- {
- return !(x < y);
- }
- #endif // !defined(__HIP_NO_HALF_OPERATORS__)
- };
- // END STRUCT __HALF2
-
- namespace
- {
- inline
- __host__ __device__
- __half2 make_half2(__half x, __half y)
- {
- return __half2{x, y};
- }
-
- inline
- __host__ __device__
- __half __low2half(__half2 x)
- {
- return __half{__half_raw{static_cast<__half2_raw>(x).data.x}};
- }
-
- inline
- __host__ __device__
- __half __high2half(__half2 x)
- {
- return __half{__half_raw{static_cast<__half2_raw>(x).data.y}};
- }
-
- inline
- __host__ __device__
- __half2 __half2half2(__half x)
- {
- return __half2{x, x};
- }
-
- inline
- __host__ __device__
- __half2 __halves2half2(__half x, __half y)
- {
- return __half2{x, y};
- }
-
- inline
- __host__ __device__
- __half2 __low2half2(__half2 x)
- {
- return __half2{
- _Float16_2{
- static_cast<__half2_raw>(x).data.x,
- static_cast<__half2_raw>(x).data.x}};
- }
-
- inline
- __host__ __device__
- __half2 __high2half2(__half2 x)
- {
- return __half2_raw{
- _Float16_2{
- static_cast<__half2_raw>(x).data.y,
- static_cast<__half2_raw>(x).data.y}};
- }
-
- inline
- __host__ __device__
- __half2 __lows2half2(__half2 x, __half2 y)
- {
- return __half2_raw{
- _Float16_2{
- static_cast<__half2_raw>(x).data.x,
- static_cast<__half2_raw>(y).data.x}};
- }
-
- inline
- __host__ __device__
- __half2 __highs2half2(__half2 x, __half2 y)
- {
- return __half2_raw{
- _Float16_2{
- static_cast<__half2_raw>(x).data.y,
- static_cast<__half2_raw>(y).data.y}};
- }
-
- inline
- __host__ __device__
- __half2 __lowhigh2highlow(__half2 x)
- {
- return __half2_raw{
- _Float16_2{
- static_cast<__half2_raw>(x).data.y,
- static_cast<__half2_raw>(x).data.x}};
- }
-
- // Bitcasts
- inline
- __device__
- short __half_as_short(__half x)
- {
- return static_cast<__half_raw>(x).x;
- }
-
- inline
- __device__
- unsigned short __half_as_ushort(__half x)
- {
- return static_cast<__half_raw>(x).x;
- }
-
- inline
- __device__
- __half __short_as_half(short x)
- {
- __half_raw r; r.x = x;
- return r;
- }
-
- inline
- __device__
- __half __ushort_as_half(unsigned short x)
- {
- __half_raw r; r.x = x;
- return r;
- }
-
- // TODO: rounding behaviour is not correct.
- // float -> half | half2
- inline
- __device__ __host__
- __half __float2half(float x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__ __host__
- __half __float2half_rn(float x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__ __host__
- __half __float2half_rz(float x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__ __host__
- __half __float2half_rd(float x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__ __host__
- __half __float2half_ru(float x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__ __host__
- __half2 __float2half2_rn(float x)
- {
- return __half2_raw{
- _Float16_2{
- static_cast<_Float16>(x), static_cast<_Float16>(x)}};
- }
- inline
- __device__ __host__
- __half2 __floats2half2_rn(float x, float y)
- {
- return __half2_raw{_Float16_2{
- static_cast<_Float16>(x), static_cast<_Float16>(y)}};
- }
- inline
- __device__ __host__
- __half2 __float22half2_rn(float2 x)
- {
- return __floats2half2_rn(x.x, x.y);
- }
-
- // half | half2 -> float
- inline
- __device__ __host__
- float __half2float(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__ __host__
- float __low2float(__half2 x)
- {
- return static_cast<__half2_raw>(x).data.x;
- }
- inline
- __device__ __host__
- float __high2float(__half2 x)
- {
- return static_cast<__half2_raw>(x).data.y;
- }
- inline
- __device__ __host__
- float2 __half22float2(__half2 x)
- {
- return make_float2(
- static_cast<__half2_raw>(x).data.x,
- static_cast<__half2_raw>(x).data.y);
- }
-
- // half -> int
- inline
- __device__
- int __half2int_rn(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- int __half2int_rz(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- int __half2int_rd(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- int __half2int_ru(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
-
- // int -> half
- inline
- __device__
- __half __int2half_rn(int x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __int2half_rz(int x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __int2half_rd(int x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __int2half_ru(int x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
-
- // half -> short
- inline
- __device__
- short __half2short_rn(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- short __half2short_rz(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- short __half2short_rd(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- short __half2short_ru(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
-
- // short -> half
- inline
- __device__
- __half __short2half_rn(short x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __short2half_rz(short x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __short2half_rd(short x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __short2half_ru(short x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
-
- // half -> long long
- inline
- __device__
- long long __half2ll_rn(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- long long __half2ll_rz(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- long long __half2ll_rd(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- long long __half2ll_ru(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
-
- // long long -> half
- inline
- __device__
- __half __ll2half_rn(long long x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __ll2half_rz(long long x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __ll2half_rd(long long x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __ll2half_ru(long long x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
-
- // half -> unsigned int
- inline
- __device__
- unsigned int __half2uint_rn(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- unsigned int __half2uint_rz(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- unsigned int __half2uint_rd(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- unsigned int __half2uint_ru(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
-
- // unsigned int -> half
- inline
- __device__
- __half __uint2half_rn(unsigned int x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __uint2half_rz(unsigned int x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __uint2half_rd(unsigned int x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __uint2half_ru(unsigned int x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
-
- // half -> unsigned short
- inline
- __device__
- unsigned short __half2ushort_rn(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- unsigned short __half2ushort_rz(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- unsigned short __half2ushort_rd(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- unsigned short __half2ushort_ru(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
-
- // unsigned short -> half
- inline
- __device__
- __half __ushort2half_rn(unsigned short x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __ushort2half_rz(unsigned short x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __ushort2half_rd(unsigned short x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __ushort2half_ru(unsigned short x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
-
- // half -> unsigned long long
- inline
- __device__
- unsigned long long __half2ull_rn(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- unsigned long long __half2ull_rz(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- unsigned long long __half2ull_rd(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- unsigned long long __half2ull_ru(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
-
- // unsigned long long -> half
- inline
- __device__
- __half __ull2half_rn(unsigned long long x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __ull2half_rz(unsigned long long x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __ull2half_rd(unsigned long long x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __ull2half_ru(unsigned long long x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
-
- // Load primitives
- inline
- __device__
- __half __ldg(const __half* ptr) { return *ptr; }
- inline
- __device__
- __half __ldcg(const __half* ptr) { return *ptr; }
- inline
- __device__
- __half __ldca(const __half* ptr) { return *ptr; }
- inline
- __device__
- __half __ldcs(const __half* ptr) { return *ptr; }
-
- inline
- __host__ __device__
- __half2 __ldg(const __half2* ptr) { return *ptr; }
- inline
- __host__ __device__
- __half2 __ldcg(const __half2* ptr) { return *ptr; }
- inline
- __host__ __device__
- __half2 __ldca(const __half2* ptr) { return *ptr; }
- inline
- __host__ __device__
- __half2 __ldcs(const __half2* ptr) { return *ptr; }
-
- // Relations
- inline
- __device__
- bool __heq(__half x, __half y)
- {
- return static_cast<__half_raw>(x).data ==
- static_cast<__half_raw>(y).data;
- }
- inline
- __device__
- bool __hne(__half x, __half y)
- {
- return static_cast<__half_raw>(x).data !=
- static_cast<__half_raw>(y).data;
- }
- inline
- __device__
- bool __hle(__half x, __half y)
- {
- return static_cast<__half_raw>(x).data <=
- static_cast<__half_raw>(y).data;
- }
- inline
- __device__
- bool __hge(__half x, __half y)
- {
- return static_cast<__half_raw>(x).data >=
- static_cast<__half_raw>(y).data;
- }
- inline
- __device__
- bool __hlt(__half x, __half y)
- {
- return static_cast<__half_raw>(x).data <
- static_cast<__half_raw>(y).data;
- }
- inline
- __device__
- bool __hgt(__half x, __half y)
- {
- return static_cast<__half_raw>(x).data >
- static_cast<__half_raw>(y).data;
- }
- inline
- __device__
- bool __hequ(__half x, __half y) { return __heq(x, y); }
- inline
- __device__
- bool __hneu(__half x, __half y) { return __hne(x, y); }
- inline
- __device__
- bool __hleu(__half x, __half y) { return __hle(x, y); }
- inline
- __device__
- bool __hgeu(__half x, __half y) { return __hge(x, y); }
- inline
- __device__
- bool __hltu(__half x, __half y) { return __hlt(x, y); }
- inline
- __device__
- bool __hgtu(__half x, __half y) { return __hgt(x, y); }
-
- inline
- __host__ __device__
- __half2 __heq2(__half2 x, __half2 y)
- {
- auto r = static_cast<__half2_raw>(x).data ==
- static_cast<__half2_raw>(y).data;
- return __builtin_convertvector(-r, _Float16_2);
- }
- inline
- __host__ __device__
- __half2 __hne2(__half2 x, __half2 y)
- {
- auto r = static_cast<__half2_raw>(x).data !=
- static_cast<__half2_raw>(y).data;
- return __builtin_convertvector(-r, _Float16_2);
- }
- inline
- __host__ __device__
- __half2 __hle2(__half2 x, __half2 y)
- {
- auto r = static_cast<__half2_raw>(x).data <=
- static_cast<__half2_raw>(y).data;
- return __builtin_convertvector(-r, _Float16_2);
- }
- inline
- __host__ __device__
- __half2 __hge2(__half2 x, __half2 y)
- {
- auto r = static_cast<__half2_raw>(x).data >=
- static_cast<__half2_raw>(y).data;
- return __builtin_convertvector(-r, _Float16_2);
- }
- inline
- __host__ __device__
- __half2 __hlt2(__half2 x, __half2 y)
- {
- auto r = static_cast<__half2_raw>(x).data <
- static_cast<__half2_raw>(y).data;
- return __builtin_convertvector(-r, _Float16_2);
- }
- inline
- __host__ __device__
- __half2 __hgt2(__half2 x, __half2 y)
- {
- auto r = static_cast<__half2_raw>(x).data >
- static_cast<__half2_raw>(y).data;
- return __builtin_convertvector(-r, _Float16_2);
- }
- inline
- __host__ __device__
- __half2 __hequ2(__half2 x, __half2 y) { return __heq2(x, y); }
- inline
- __host__ __device__
- __half2 __hneu2(__half2 x, __half2 y) { return __hne2(x, y); }
- inline
- __host__ __device__
- __half2 __hleu2(__half2 x, __half2 y) { return __hle2(x, y); }
- inline
- __host__ __device__
- __half2 __hgeu2(__half2 x, __half2 y) { return __hge2(x, y); }
- inline
- __host__ __device__
- __half2 __hltu2(__half2 x, __half2 y) { return __hlt2(x, y); }
- inline
- __host__ __device__
- __half2 __hgtu2(__half2 x, __half2 y) { return __hgt2(x, y); }
-
- inline
- __host__ __device__
- bool __hbeq2(__half2 x, __half2 y)
- {
- auto r = static_cast<__half2_raw>(__heq2(x, y));
- return r.data.x != 0 && r.data.y != 0;
- }
- inline
- __host__ __device__
- bool __hbne2(__half2 x, __half2 y)
- {
- auto r = static_cast<__half2_raw>(__hne2(x, y));
- return r.data.x != 0 && r.data.y != 0;
- }
- inline
- __host__ __device__
- bool __hble2(__half2 x, __half2 y)
- {
- auto r = static_cast<__half2_raw>(__hle2(x, y));
- return r.data.x != 0 && r.data.y != 0;
- }
- inline
- __host__ __device__
- bool __hbge2(__half2 x, __half2 y)
- {
- auto r = static_cast<__half2_raw>(__hge2(x, y));
- return r.data.x != 0 && r.data.y != 0;
- }
- inline
- __host__ __device__
- bool __hblt2(__half2 x, __half2 y)
- {
- auto r = static_cast<__half2_raw>(__hlt2(x, y));
- return r.data.x != 0 && r.data.y != 0;
- }
- inline
- __host__ __device__
- bool __hbgt2(__half2 x, __half2 y)
- {
- auto r = static_cast<__half2_raw>(__hgt2(x, y));
- return r.data.x != 0 && r.data.y != 0;
- }
- inline
- __host__ __device__
- bool __hbequ2(__half2 x, __half2 y) { return __hbeq2(x, y); }
- inline
- __host__ __device__
- bool __hbneu2(__half2 x, __half2 y) { return __hbne2(x, y); }
- inline
- __host__ __device__
- bool __hbleu2(__half2 x, __half2 y) { return __hble2(x, y); }
- inline
- __host__ __device__
- bool __hbgeu2(__half2 x, __half2 y) { return __hbge2(x, y); }
- inline
- __host__ __device__
- bool __hbltu2(__half2 x, __half2 y) { return __hblt2(x, y); }
- inline
- __host__ __device__
- bool __hbgtu2(__half2 x, __half2 y) { return __hbgt2(x, y); }
-
- // Arithmetic
- inline
- __device__
- __half __clamp_01(__half x)
- {
- auto r = static_cast<__half_raw>(x);
-
- if (__hlt(x, __half_raw{0})) return __half_raw{0};
- if (__hlt(__half_raw{1}, x)) return __half_raw{1};
- return r;
- }
-
- inline
- __device__
- __half __hadd(__half x, __half y)
- {
- return __half_raw{
- static_cast<__half_raw>(x).data +
- static_cast<__half_raw>(y).data};
- }
- inline
- __device__
- __half __habs(__half x)
- {
- return __half_raw{
- __ocml_fabs_f16(static_cast<__half_raw>(x).data)};
- }
- inline
- __device__
- __half __hsub(__half x, __half y)
- {
- return __half_raw{
- static_cast<__half_raw>(x).data -
- static_cast<__half_raw>(y).data};
- }
- inline
- __device__
- __half __hmul(__half x, __half y)
- {
- return __half_raw{
- static_cast<__half_raw>(x).data *
- static_cast<__half_raw>(y).data};
- }
- inline
- __device__
- __half __hadd_sat(__half x, __half y)
- {
- return __clamp_01(__hadd(x, y));
- }
- inline
- __device__
- __half __hsub_sat(__half x, __half y)
- {
- return __clamp_01(__hsub(x, y));
- }
- inline
- __device__
- __half __hmul_sat(__half x, __half y)
- {
- return __clamp_01(__hmul(x, y));
- }
- inline
- __device__
- __half __hfma(__half x, __half y, __half z)
- {
- return __half_raw{__ocml_fma_f16(
- static_cast<__half_raw>(x).data,
- static_cast<__half_raw>(y).data,
- static_cast<__half_raw>(z).data)};
- }
- inline
- __device__
- __half __hfma_sat(__half x, __half y, __half z)
- {
- return __clamp_01(__hfma(x, y, z));
- }
- inline
- __device__
- __half __hdiv(__half x, __half y)
- {
- return __half_raw{
- static_cast<__half_raw>(x).data /
- static_cast<__half_raw>(y).data};
- }
-
- inline
- __host__ __device__
- __half2 __hadd2(__half2 x, __half2 y)
- {
- return __half2_raw{
- static_cast<__half2_raw>(x).data +
- static_cast<__half2_raw>(y).data};
- }
- inline
- __host__ __device__
- __half2 __habs2(__half2 x)
- {
- return __half2_raw{
- __ocml_fabs_2f16(static_cast<__half2_raw>(x).data)};
- }
- inline
- __host__ __device__
- __half2 __hsub2(__half2 x, __half2 y)
- {
- return __half2_raw{
- static_cast<__half2_raw>(x).data -
- static_cast<__half2_raw>(y).data};
- }
- inline
- __host__ __device__
- __half2 __hmul2(__half2 x, __half2 y)
- {
- return __half2_raw{
- static_cast<__half2_raw>(x).data *
- static_cast<__half2_raw>(y).data};
- }
- inline
- __host__ __device__
- __half2 __hadd2_sat(__half2 x, __half2 y)
- {
- auto r = static_cast<__half2_raw>(__hadd2(x, y));
- return __half2{
- __clamp_01(__half_raw{r.data.x}),
- __clamp_01(__half_raw{r.data.y})};
- }
- inline
- __host__ __device__
- __half2 __hsub2_sat(__half2 x, __half2 y)
- {
- auto r = static_cast<__half2_raw>(__hsub2(x, y));
- return __half2{
- __clamp_01(__half_raw{r.data.x}),
- __clamp_01(__half_raw{r.data.y})};
- }
- inline
- __host__ __device__
- __half2 __hmul2_sat(__half2 x, __half2 y)
- {
- auto r = static_cast<__half2_raw>(__hmul2(x, y));
- return __half2{
- __clamp_01(__half_raw{r.data.x}),
- __clamp_01(__half_raw{r.data.y})};
- }
- inline
- __host__ __device__
- __half2 __hfma2(__half2 x, __half2 y, __half2 z)
- {
- return __half2_raw{__ocml_fma_2f16(x, y, z)};
- }
- inline
- __host__ __device__
- __half2 __hfma2_sat(__half2 x, __half2 y, __half2 z)
- {
- auto r = static_cast<__half2_raw>(__hfma2(x, y, z));
- return __half2{
- __clamp_01(__half_raw{r.data.x}),
- __clamp_01(__half_raw{r.data.y})};
- }
- inline
- __host__ __device__
- __half2 __h2div(__half2 x, __half2 y)
- {
- return __half2_raw{
- static_cast<__half2_raw>(x).data /
- static_cast<__half2_raw>(y).data};
- }
-
- // Math functions
- #if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__
- inline
- __device__
- float amd_mixed_dot(__half2 a, __half2 b, float c, bool saturate) {
- return __ockl_fdot2(static_cast<__half2_raw>(a).data,
- static_cast<__half2_raw>(b).data,
- c, saturate);
- }
- #endif
- inline
- __device__
- __half htrunc(__half x)
- {
- return __half_raw{
- __ocml_trunc_f16(static_cast<__half_raw>(x).data)};
- }
- inline
- __device__
- __half hceil(__half x)
- {
- return __half_raw{
- __ocml_ceil_f16(static_cast<__half_raw>(x).data)};
- }
- inline
- __device__
- __half hfloor(__half x)
- {
- return __half_raw{
- __ocml_floor_f16(static_cast<__half_raw>(x).data)};
- }
- inline
- __device__
- __half hrint(__half x)
- {
- return __half_raw{
- __ocml_rint_f16(static_cast<__half_raw>(x).data)};
- }
- inline
- __device__
- __half hsin(__half x)
- {
- return __half_raw{
- __ocml_sin_f16(static_cast<__half_raw>(x).data)};
- }
- inline
- __device__
- __half hcos(__half x)
- {
- return __half_raw{
- __ocml_cos_f16(static_cast<__half_raw>(x).data)};
- }
- inline
- __device__
- __half hexp(__half x)
- {
- return __half_raw{
- __ocml_exp_f16(static_cast<__half_raw>(x).data)};
- }
- inline
- __device__
- __half hexp2(__half x)
- {
- return __half_raw{
- __ocml_exp2_f16(static_cast<__half_raw>(x).data)};
- }
- inline
- __device__
- __half hexp10(__half x)
- {
- return __half_raw{
- __ocml_exp10_f16(static_cast<__half_raw>(x).data)};
- }
- inline
- __device__
- __half hlog2(__half x)
- {
- return __half_raw{
- __ocml_log2_f16(static_cast<__half_raw>(x).data)};
- }
- inline
- __device__
- __half hlog(__half x)
- {
- return __half_raw{
- __ocml_log_f16(static_cast<__half_raw>(x).data)};
- }
- inline
- __device__
- __half hlog10(__half x)
- {
- return __half_raw{
- __ocml_log10_f16(static_cast<__half_raw>(x).data)};
- }
- inline
- __device__
- __half hrcp(__half x)
- {
- return __half_raw{
- __llvm_amdgcn_rcp_f16(static_cast<__half_raw>(x).data)};
- }
- inline
- __device__
- __half hrsqrt(__half x)
- {
- return __half_raw{
- __ocml_rsqrt_f16(static_cast<__half_raw>(x).data)};
- }
- inline
- __device__
- __half hsqrt(__half x)
- {
- return __half_raw{
- __ocml_sqrt_f16(static_cast<__half_raw>(x).data)};
- }
- inline
- __device__
- bool __hisinf(__half x)
- {
- return __ocml_isinf_f16(static_cast<__half_raw>(x).data);
- }
- inline
- __device__
- bool __hisnan(__half x)
- {
- return __ocml_isnan_f16(static_cast<__half_raw>(x).data);
- }
- inline
- __device__
- __half __hneg(__half x)
- {
- return __half_raw{-static_cast<__half_raw>(x).data};
- }
-
- inline
- __host__ __device__
- __half2 h2trunc(__half2 x)
- {
- return __half2_raw{__ocml_trunc_2f16(x)};
- }
- inline
- __host__ __device__
- __half2 h2ceil(__half2 x)
- {
- return __half2_raw{__ocml_ceil_2f16(x)};
- }
- inline
- __host__ __device__
- __half2 h2floor(__half2 x)
- {
- return __half2_raw{__ocml_floor_2f16(x)};
- }
- inline
- __host__ __device__
- __half2 h2rint(__half2 x)
- {
- return __half2_raw{__ocml_rint_2f16(x)};
- }
- inline
- __host__ __device__
- __half2 h2sin(__half2 x)
- {
- return __half2_raw{__ocml_sin_2f16(x)};
- }
- inline
- __host__ __device__
- __half2 h2cos(__half2 x)
- {
- return __half2_raw{__ocml_cos_2f16(x)};
- }
- inline
- __host__ __device__
- __half2 h2exp(__half2 x)
- {
- return __half2_raw{__ocml_exp_2f16(x)};
- }
- inline
- __host__ __device__
- __half2 h2exp2(__half2 x)
- {
- return __half2_raw{__ocml_exp2_2f16(x)};
- }
- inline
- __host__ __device__
- __half2 h2exp10(__half2 x)
- {
- return __half2_raw{__ocml_exp10_2f16(x)};
- }
- inline
- __host__ __device__
- __half2 h2log2(__half2 x)
- {
- return __half2_raw{__ocml_log2_2f16(x)};
- }
- inline
- __host__ __device__
- __half2 h2log(__half2 x) { return __ocml_log_2f16(x); }
- inline
- __host__ __device__
- __half2 h2log10(__half2 x) { return __ocml_log10_2f16(x); }
- inline
- __host__ __device__
- __half2 h2rcp(__half2 x) { return __llvm_amdgcn_rcp_2f16(x); }
- inline
- __host__ __device__
- __half2 h2rsqrt(__half2 x) { return __ocml_rsqrt_2f16(x); }
- inline
- __host__ __device__
- __half2 h2sqrt(__half2 x) { return __ocml_sqrt_2f16(x); }
- inline
- __host__ __device__
- __half2 __hisinf2(__half2 x)
- {
- auto r = __ocml_isinf_2f16(x);
- return __half2_raw{_Float16_2{
- static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}};
- }
- inline
- __host__ __device__
- __half2 __hisnan2(__half2 x)
- {
- auto r = __ocml_isnan_2f16(x);
- return __half2_raw{_Float16_2{
- static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}};
- }
- inline
- __host__ __device__
- __half2 __hneg2(__half2 x)
- {
- return __half2_raw{-static_cast<__half2_raw>(x).data};
- }
- } // Anonymous namespace.
-
- #if !defined(HIP_NO_HALF)
- using half = __half;
- using half2 = __half2;
- #endif
- #endif // defined(__cplusplus)
-#elif defined(__GNUC__)
- #include "hip_fp16_gcc.h"
-#endif // !defined(__clang__) && defined(__GNUC__)
-
-#endif // HIP_INCLUDE_HIP_HCC_DETAIL_HIP_FP16_H
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_fp16_gcc.h b/third_party/rocm/include/hip/hcc_detail/hip_fp16_gcc.h
deleted file mode 100644
index 480fd81..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_fp16_gcc.h
+++ /dev/null
@@ -1,254 +0,0 @@
-#pragma once
-
-#if defined(__cplusplus)
- #include <cstring>
-#endif
-
-struct __half_raw {
- unsigned short x;
-};
-
-struct __half2_raw {
- unsigned short x;
- unsigned short y;
-};
-
-#if defined(__cplusplus)
- struct __half;
-
- __half __float2half(float);
- float __half2float(__half);
-
- // BEGIN STRUCT __HALF
- struct __half {
- protected:
- unsigned short __x;
- public:
- // CREATORS
- __half() = default;
- __half(const __half_raw& x) : __x{x.x} {}
- #if !defined(__HIP_NO_HALF_CONVERSIONS__)
- __half(float x) : __x{__float2half(x).__x} {}
- __half(double x) : __x{__float2half(x).__x} {}
- #endif
- __half(const __half&) = default;
- __half(__half&&) = default;
- ~__half() = default;
-
- // MANIPULATORS
- __half& operator=(const __half&) = default;
- __half& operator=(__half&&) = default;
- __half& operator=(const __half_raw& x) { __x = x.x; return *this; }
- #if !defined(__HIP_NO_HALF_CONVERSIONS__)
- __half& operator=(float x)
- {
- __x = __float2half(x).__x;
- return *this;
- }
- __half& operator=(double x)
- {
- return *this = static_cast<float>(x);
- }
- #endif
-
- // ACCESSORS
- operator float() const { return __half2float(*this); }
- operator __half_raw() const { return __half_raw{__x}; }
- };
- // END STRUCT __HALF
-
- // BEGIN STRUCT __HALF2
- struct __half2 {
- protected:
- __half x;
- __half y;
- public:
- // CREATORS
- __half2() = default;
- __half2(const __half2_raw& ix)
- :
- x{reinterpret_cast<const __half&>(ix.x)},
- y{reinterpret_cast<const __half&>(ix.y)}
- {}
- __half2(const __half& ix, const __half& iy) : x{ix}, y{iy} {}
- __half2(const __half2&) = default;
- __half2(__half2&&) = default;
- ~__half2() = default;
-
- // MANIPULATORS
- __half2& operator=(const __half2&) = default;
- __half2& operator=(__half2&&) = default;
- __half2& operator=(const __half2_raw& ix)
- {
- x = reinterpret_cast<const __half_raw&>(ix.x);
- y = reinterpret_cast<const __half_raw&>(ix.y);
- return *this;
- }
-
- // ACCESSORS
- operator __half2_raw() const
- {
- return __half2_raw{
- reinterpret_cast<const unsigned short&>(x),
- reinterpret_cast<const unsigned short&>(y)};
- }
- };
- // END STRUCT __HALF2
-
- inline
- unsigned short __internal_float2half(
- float flt, unsigned int& sgn, unsigned int& rem)
- {
- unsigned int x{};
- std::memcpy(&x, &flt, sizeof(flt));
-
- unsigned int u = (x & 0x7fffffffU);
- sgn = ((x >> 16) & 0x8000U);
-
- // NaN/+Inf/-Inf
- if (u >= 0x7f800000U) {
- rem = 0;
- return static_cast<unsigned short>(
- (u == 0x7f800000U) ? (sgn | 0x7c00U) : 0x7fffU);
- }
- // Overflows
- if (u > 0x477fefffU) {
- rem = 0x80000000U;
- return static_cast<unsigned short>(sgn | 0x7bffU);
- }
- // Normal numbers
- if (u >= 0x38800000U) {
- rem = u << 19;
- u -= 0x38000000U;
- return static_cast<unsigned short>(sgn | (u >> 13));
- }
- // +0/-0
- if (u < 0x33000001U) {
- rem = u;
- return static_cast<unsigned short>(sgn);
- }
- // Denormal numbers
- unsigned int exponent = u >> 23;
- unsigned int mantissa = (u & 0x7fffffU);
- unsigned int shift = 0x7eU - exponent;
- mantissa |= 0x800000U;
- rem = mantissa << (32 - shift);
- return static_cast<unsigned short>(sgn | (mantissa >> shift));
- }
-
- inline
- __half __float2half(float x)
- {
- __half_raw r;
- unsigned int sgn{};
- unsigned int rem{};
- r.x = __internal_float2half(x, sgn, rem);
- if (rem > 0x80000000U || (rem == 0x80000000U && (r.x & 0x1))) ++r.x;
-
- return r;
- }
-
- inline
- __half __float2half_rn(float x) { return __float2half(x); }
-
- inline
- __half __float2half_rz(float x)
- {
- __half_raw r;
- unsigned int sgn{};
- unsigned int rem{};
- r.x = __internal_float2half(x, sgn, rem);
-
- return r;
- }
-
- inline
- __half __float2half_rd(float x)
- {
- __half_raw r;
- unsigned int sgn{};
- unsigned int rem{};
- r.x = __internal_float2half(x, sgn, rem);
- if (rem && sgn) ++r.x;
-
- return r;
- }
-
- inline
- __half __float2half_ru(float x)
- {
- __half_raw r;
- unsigned int sgn{};
- unsigned int rem{};
- r.x = __internal_float2half(x, sgn, rem);
- if (rem && !sgn) ++r.x;
-
- return r;
- }
-
- inline
- __half2 __float2half2_rn(float x)
- {
- return __half2{__float2half_rn(x), __float2half_rn(x)};
- }
-
- inline
- __half2 __floats2half2_rn(float x, float y)
- {
- return __half2{__float2half_rn(x), __float2half_rn(y)};
- }
-
- inline
- float __internal_half2float(unsigned short x)
- {
- unsigned int sign = ((x >> 15) & 1);
- unsigned int exponent = ((x >> 10) & 0x1f);
- unsigned int mantissa = ((x & 0x3ff) << 13);
-
- if (exponent == 0x1fU) { /* NaN or Inf */
- mantissa = (mantissa ? (sign = 0, 0x7fffffU) : 0);
- exponent = 0xffU;
- } else if (!exponent) { /* Denorm or Zero */
- if (mantissa) {
- unsigned int msb;
- exponent = 0x71U;
- do {
- msb = (mantissa & 0x400000U);
- mantissa <<= 1; /* normalize */
- --exponent;
- } while (!msb);
- mantissa &= 0x7fffffU; /* 1.mantissa is implicit */
- }
- } else {
- exponent += 0x70U;
- }
- unsigned int u = ((sign << 31) | (exponent << 23) | mantissa);
- float f;
- memcpy(&f, &u, sizeof(u));
-
- return f;
- }
-
- inline
- float __half2float(__half x)
- {
- return __internal_half2float(static_cast<__half_raw>(x).x);
- }
-
- inline
- float __low2float(__half2 x)
- {
- return __internal_half2float(static_cast<__half2_raw>(x).x);
- }
-
- inline
- float __high2float(__half2 x)
- {
- return __internal_half2float(static_cast<__half2_raw>(x).y);
- }
-
- #if !defined(HIP_NO_HALF)
- using half = __half;
- using half2 = __half2;
- #endif
-#endif // defined(__cplusplus)
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_fp16_math_fwd.h b/third_party/rocm/include/hip/hcc_detail/hip_fp16_math_fwd.h
deleted file mode 100644
index 53a2c66..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_fp16_math_fwd.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#pragma once
-
-// /*
-// Half Math Functions
-// */
-
-#include "host_defines.h"
-#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
-extern "C"
-{
- __device__ __attribute__((const)) _Float16 __ocml_ceil_f16(_Float16);
- __device__ _Float16 __ocml_cos_f16(_Float16);
- __device__ __attribute__((pure)) _Float16 __ocml_exp_f16(_Float16);
- __device__ __attribute__((pure)) _Float16 __ocml_exp10_f16(_Float16);
- __device__ __attribute__((pure)) _Float16 __ocml_exp2_f16(_Float16);
- __device__ __attribute__((const)) _Float16 __ocml_floor_f16(_Float16);
- __device__ __attribute__((const))
- _Float16 __ocml_fma_f16(_Float16, _Float16, _Float16);
- __device__ __attribute__((const)) _Float16 __ocml_fabs_f16(_Float16);
- __device__ __attribute__((const)) int __ocml_isinf_f16(_Float16);
- __device__ __attribute__((const)) int __ocml_isnan_f16(_Float16);
- __device__ __attribute__((pure)) _Float16 __ocml_log_f16(_Float16);
- __device__ __attribute__((pure)) _Float16 __ocml_log10_f16(_Float16);
- __device__ __attribute__((pure)) _Float16 __ocml_log2_f16(_Float16);
- __device__ __attribute__((pure)) _Float16 __ocml_pown_f16(_Float16, int);
- __device__ __attribute__((const)) _Float16 __llvm_amdgcn_rcp_f16(_Float16);
- __device__ __attribute__((const)) _Float16 __ocml_rint_f16(_Float16);
- __device__ __attribute__((const)) _Float16 __ocml_rsqrt_f16(_Float16);
- __device__ _Float16 __ocml_sin_f16(_Float16);
- __device__ __attribute__((const)) _Float16 __ocml_sqrt_f16(_Float16);
- __device__ __attribute__((const)) _Float16 __ocml_trunc_f16(_Float16);
-
- typedef _Float16 __2f16 __attribute__((ext_vector_type(2)));
- typedef short __2i16 __attribute__((ext_vector_type(2)));
-
- #if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__
- __device__ __attribute__((const)) float __ockl_fdot2(__2f16 a, __2f16 b, float c, bool s);
- #endif
-
- __device__ __attribute__((const)) __2f16 __ocml_ceil_2f16(__2f16);
- __device__ __attribute__((const)) __2f16 __ocml_fabs_2f16(__2f16);
- __device__ __2f16 __ocml_cos_2f16(__2f16);
- __device__ __attribute__((pure)) __2f16 __ocml_exp_2f16(__2f16);
- __device__ __attribute__((pure)) __2f16 __ocml_exp10_2f16(__2f16);
- __device__ __attribute__((pure)) __2f16 __ocml_exp2_2f16(__2f16);
- __device__ __attribute__((const)) __2f16 __ocml_floor_2f16(__2f16);
- __device__ __attribute__((const)) __2f16 __ocml_fma_2f16(__2f16, __2f16, __2f16);
- __device__ __attribute__((const)) __2i16 __ocml_isinf_2f16(__2f16);
- __device__ __attribute__((const)) __2i16 __ocml_isnan_2f16(__2f16);
- __device__ __attribute__((pure)) __2f16 __ocml_log_2f16(__2f16);
- __device__ __attribute__((pure)) __2f16 __ocml_log10_2f16(__2f16);
- __device__ __attribute__((pure)) __2f16 __ocml_log2_2f16(__2f16);
- __device__ inline
- __2f16 __llvm_amdgcn_rcp_2f16(__2f16 x) // Not currently exposed by ROCDL.
- {
- return __2f16{__llvm_amdgcn_rcp_f16(x.x), __llvm_amdgcn_rcp_f16(x.y)};
- }
- __device__ __attribute__((const)) __2f16 __ocml_rint_2f16(__2f16);
- __device__ __attribute__((const)) __2f16 __ocml_rsqrt_2f16(__2f16);
- __device__ __2f16 __ocml_sin_2f16(__2f16);
- __device__ __attribute__((const)) __2f16 __ocml_sqrt_2f16(__2f16);
- __device__ __attribute__((const)) __2f16 __ocml_trunc_2f16(__2f16);
-}
-#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_ldg.h b/third_party/rocm/include/hip/hcc_detail/hip_ldg.h
deleted file mode 100644
index ab86955..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_ldg.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_LDG_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_LDG_H
-
-#if defined(__HCC_OR_HIP_CLANG__)
-#if __hcc_workweek__ >= 16164 || __HIP_CLANG_ONLY__
-#include "hip_vector_types.h"
-#include "host_defines.h"
-
-__device__ inline static char __ldg(const char* ptr) { return *ptr; }
-
-__device__ inline static char2 __ldg(const char2* ptr) { return *ptr; }
-
-__device__ inline static char4 __ldg(const char4* ptr) { return *ptr; }
-
-__device__ inline static signed char __ldg(const signed char* ptr) { return ptr[0]; }
-
-__device__ inline static unsigned char __ldg(const unsigned char* ptr) { return ptr[0]; }
-
-
-__device__ inline static short __ldg(const short* ptr) { return ptr[0]; }
-
-__device__ inline static short2 __ldg(const short2* ptr) { return ptr[0]; }
-
-__device__ inline static short4 __ldg(const short4* ptr) { return ptr[0]; }
-
-__device__ inline static unsigned short __ldg(const unsigned short* ptr) { return ptr[0]; }
-
-
-__device__ inline static int __ldg(const int* ptr) { return ptr[0]; }
-
-__device__ inline static int2 __ldg(const int2* ptr) { return ptr[0]; }
-
-__device__ inline static int4 __ldg(const int4* ptr) { return ptr[0]; }
-
-__device__ inline static unsigned int __ldg(const unsigned int* ptr) { return ptr[0]; }
-
-
-__device__ inline static long __ldg(const long* ptr) { return ptr[0]; }
-
-__device__ inline static unsigned long __ldg(const unsigned long* ptr) { return ptr[0]; }
-
-
-__device__ inline static long long __ldg(const long long* ptr) { return ptr[0]; }
-
-__device__ inline static longlong2 __ldg(const longlong2* ptr) { return ptr[0]; }
-
-__device__ inline static unsigned long long __ldg(const unsigned long long* ptr) { return ptr[0]; }
-
-
-__device__ inline static uchar2 __ldg(const uchar2* ptr) { return ptr[0]; }
-
-__device__ inline static uchar4 __ldg(const uchar4* ptr) { return ptr[0]; }
-
-
-__device__ inline static ushort2 __ldg(const ushort2* ptr) { return ptr[0]; }
-
-
-__device__ inline static uint2 __ldg(const uint2* ptr) { return ptr[0]; }
-
-__device__ inline static uint4 __ldg(const uint4* ptr) { return ptr[0]; }
-
-
-__device__ inline static ulonglong2 __ldg(const ulonglong2* ptr) { return ptr[0]; }
-
-
-__device__ inline static float __ldg(const float* ptr) { return ptr[0]; }
-
-__device__ inline static float2 __ldg(const float2* ptr) { return ptr[0]; }
-
-__device__ inline static float4 __ldg(const float4* ptr) { return ptr[0]; }
-
-
-__device__ inline static double __ldg(const double* ptr) { return ptr[0]; }
-
-__device__ inline static double2 __ldg(const double2* ptr) { return ptr[0]; }
-
-#endif // __hcc_workweek__ || __HIP_CLANG_ONLY__
-
-#endif // defined(__HCC_OR_HIP_CLANG__)
-
-#endif // HIP_LDG_H
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_memory.h b/third_party/rocm/include/hip/hcc_detail/hip_memory.h
deleted file mode 100644
index 0c00614..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_memory.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_MEMORY_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_MEMORY_H
-
-// Implementation of malloc and free device functions.
-// HIP heap is implemented as a global array with fixed size. Users may define
-// __HIP_SIZE_OF_PAGE and __HIP_NUM_PAGES to have a larger heap.
-
-#if (__HCC__ || __HIP__) && __HIP_ENABLE_DEVICE_MALLOC__
-
-// Size of page in bytes.
-#ifndef __HIP_SIZE_OF_PAGE
-#define __HIP_SIZE_OF_PAGE 64
-#endif
-
-// Total number of pages
-#ifndef __HIP_NUM_PAGES
-#define __HIP_NUM_PAGES (16 * 64 * 64)
-#endif
-
-#define __HIP_SIZE_OF_HEAP (__HIP_NUM_PAGES * __HIP_SIZE_OF_PAGE)
-
-#if __HIP__ && __HIP_DEVICE_COMPILE__
-__attribute__((weak)) __device__ char __hip_device_heap[__HIP_SIZE_OF_HEAP];
-__attribute__((weak)) __device__
- uint32_t __hip_device_page_flag[__HIP_NUM_PAGES];
-#else
-extern __device__ char __hip_device_heap[];
-extern __device__ uint32_t __hip_device_page_flag[];
-#endif
-
-extern "C" inline __device__ void* __hip_malloc(size_t size) {
- char* heap = (char*)__hip_device_heap;
- if (size > __HIP_SIZE_OF_HEAP) {
- return (void*)nullptr;
- }
- uint32_t totalThreads =
- hipBlockDim_x * hipGridDim_x * hipBlockDim_y
- * hipGridDim_y * hipBlockDim_z * hipGridDim_z;
- uint32_t currentWorkItem = hipThreadIdx_x + hipBlockDim_x * hipBlockIdx_x
- + (hipThreadIdx_y + hipBlockDim_y * hipBlockIdx_y) * hipBlockDim_x
- + (hipThreadIdx_z + hipBlockDim_z * hipBlockIdx_z) * hipBlockDim_x
- * hipBlockDim_y;
-
- uint32_t numHeapsPerWorkItem = __HIP_NUM_PAGES / totalThreads;
- uint32_t heapSizePerWorkItem = __HIP_SIZE_OF_HEAP / totalThreads;
-
- uint32_t stride = size / __HIP_SIZE_OF_PAGE;
- uint32_t start = numHeapsPerWorkItem * currentWorkItem;
-
- uint32_t k = 0;
-
- while (__hip_device_page_flag[k] > 0) {
- k++;
- }
-
- for (uint32_t i = 0; i < stride - 1; i++) {
- __hip_device_page_flag[i + start + k] = 1;
- }
-
- __hip_device_page_flag[start + stride - 1 + k] = 2;
-
- void* ptr = (void*)(heap
- + heapSizePerWorkItem * currentWorkItem + k * __HIP_SIZE_OF_PAGE);
-
- return ptr;
-}
-
-extern "C" inline __device__ void* __hip_free(void* ptr) {
- if (ptr == nullptr) {
- return nullptr;
- }
-
- uint32_t offsetByte = (uint64_t)ptr - (uint64_t)__hip_device_heap;
- uint32_t offsetPage = offsetByte / __HIP_SIZE_OF_PAGE;
-
- while (__hip_device_page_flag[offsetPage] != 0) {
- if (__hip_device_page_flag[offsetPage] == 2) {
- __hip_device_page_flag[offsetPage] = 0;
- offsetPage++;
- break;
- } else {
- __hip_device_page_flag[offsetPage] = 0;
- offsetPage++;
- }
- }
-
- return nullptr;
-}
-
-#endif
-
-#endif // HIP_INCLUDE_HIP_HCC_DETAIL_HIP_MEMORY_H
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_prof_str.h b/third_party/rocm/include/hip/hcc_detail/hip_prof_str.h
deleted file mode 100644
index cb297b2..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_prof_str.h
+++ /dev/null
@@ -1,5127 +0,0 @@
-// automatically generated sources
-#ifndef _HIP_PROF_STR_H
-#define _HIP_PROF_STR_H
-#define HIP_PROF_VER 1
-
-// Dummy API primitives
-#define INIT_NONE_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefGetAddress_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefSetBorderColor_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipMemcpyDtoA_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipArrayGetDescriptor_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexObjectGetResourceViewDesc_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipMemcpyAtoHAsync_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipDestroyTextureObject_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipArray3DGetDescriptor_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefSetAddress_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipArrayDestroy_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefGetMaxAnisotropy_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefSetMipmapFilterMode_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipDeviceGetCount_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipMemcpyArrayToArray_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipBindTexture2D_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipCreateTextureObject_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipMemcpyHtoAAsync_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipMemcpyAtoA_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipMemcpyAtoD_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipBindTextureToMipmappedArray_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefGetMipmapLevelClamp_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipBindTextureToArray_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefSetFlags_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefSetFormat_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexObjectGetTextureDesc_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexObjectDestroy_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipMemcpy2DArrayToArray_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefGetArray_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipGetTextureReference_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipMipmappedArrayDestroy_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefGetFilterMode_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefGetFormat_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefSetArray_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipMemcpyToArrayAsync_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefSetAddress2D_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipGetTextureObjectResourceViewDesc_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefGetFlags_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipUnbindTexture_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefGetMipmapLevelBias_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefSetFilterMode_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipGetTextureAlignmentOffset_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipMipmappedArrayGetLevel_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipCreateSurfaceObject_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipMipmappedArrayCreate_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexObjectGetResourceDesc_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipGetChannelDesc_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefGetAddressMode_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipGetTextureObjectResourceDesc_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipModuleLaunchKernelExt_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipMemcpy2DToArrayAsync_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefGetBorderColor_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipDestroySurfaceObject_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefGetMipmapFilterMode_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefSetMaxAnisotropy_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexObjectCreate_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefSetAddressMode_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefSetMipmapLevelBias_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipMemcpyFromArrayAsync_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipBindTexture_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefSetMipmappedArray_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefGetMipmappedArray_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipSetValidDevices_CB_ARGS_DATA(cb_data) {};
-#define INIT_ihipModuleLaunchKernel_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefSetMipmapLevelClamp_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipGetTextureObjectTextureDesc_CB_ARGS_DATA(cb_data) {};
-
-// HIP API callbacks ID enumaration
-enum hip_api_id_t {
- HIP_API_ID_hipDrvMemcpy3DAsync = 0,
- HIP_API_ID_hipDeviceEnablePeerAccess = 1,
- HIP_API_ID_hipFuncSetSharedMemConfig = 2,
- HIP_API_ID_hipMemcpyToSymbolAsync = 3,
- HIP_API_ID_hipMallocPitch = 4,
- HIP_API_ID_hipMalloc = 5,
- HIP_API_ID_hipMemsetD16 = 6,
- HIP_API_ID_hipExtStreamGetCUMask = 7,
- HIP_API_ID_hipEventRecord = 8,
- HIP_API_ID_hipCtxSynchronize = 9,
- HIP_API_ID_hipSetDevice = 10,
- HIP_API_ID_hipCtxGetApiVersion = 11,
- HIP_API_ID_hipMemcpyFromSymbolAsync = 12,
- HIP_API_ID_hipExtGetLinkTypeAndHopCount = 13,
- HIP_API_ID___hipPopCallConfiguration = 14,
- HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor = 15,
- HIP_API_ID_hipMemset3D = 16,
- HIP_API_ID_hipStreamCreateWithPriority = 17,
- HIP_API_ID_hipMemcpy2DToArray = 18,
- HIP_API_ID_hipMemsetD8Async = 19,
- HIP_API_ID_hipCtxGetCacheConfig = 20,
- HIP_API_ID_hipModuleGetFunction = 21,
- HIP_API_ID_hipStreamWaitEvent = 22,
- HIP_API_ID_hipDeviceGetStreamPriorityRange = 23,
- HIP_API_ID_hipModuleLoad = 24,
- HIP_API_ID_hipDevicePrimaryCtxSetFlags = 25,
- HIP_API_ID_hipLaunchCooperativeKernel = 26,
- HIP_API_ID_hipLaunchCooperativeKernelMultiDevice = 27,
- HIP_API_ID_hipMemcpyAsync = 28,
- HIP_API_ID_hipMalloc3DArray = 29,
- HIP_API_ID_hipMallocHost = 30,
- HIP_API_ID_hipCtxGetCurrent = 31,
- HIP_API_ID_hipDevicePrimaryCtxGetState = 32,
- HIP_API_ID_hipEventQuery = 33,
- HIP_API_ID_hipEventCreate = 34,
- HIP_API_ID_hipMemGetAddressRange = 35,
- HIP_API_ID_hipMemcpyFromSymbol = 36,
- HIP_API_ID_hipArrayCreate = 37,
- HIP_API_ID_hipStreamAttachMemAsync = 38,
- HIP_API_ID_hipStreamGetFlags = 39,
- HIP_API_ID_hipMallocArray = 40,
- HIP_API_ID_hipCtxGetSharedMemConfig = 41,
- HIP_API_ID_hipDeviceDisablePeerAccess = 42,
- HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSize = 43,
- HIP_API_ID_hipMemPtrGetInfo = 44,
- HIP_API_ID_hipFuncGetAttribute = 45,
- HIP_API_ID_hipCtxGetFlags = 46,
- HIP_API_ID_hipStreamDestroy = 47,
- HIP_API_ID___hipPushCallConfiguration = 48,
- HIP_API_ID_hipMemset3DAsync = 49,
- HIP_API_ID_hipDeviceGetPCIBusId = 50,
- HIP_API_ID_hipInit = 51,
- HIP_API_ID_hipMemcpyAtoH = 52,
- HIP_API_ID_hipStreamGetPriority = 53,
- HIP_API_ID_hipMemset2D = 54,
- HIP_API_ID_hipMemset2DAsync = 55,
- HIP_API_ID_hipDeviceCanAccessPeer = 56,
- HIP_API_ID_hipLaunchByPtr = 57,
- HIP_API_ID_hipMemPrefetchAsync = 58,
- HIP_API_ID_hipCtxDestroy = 59,
- HIP_API_ID_hipMemsetD16Async = 60,
- HIP_API_ID_hipModuleUnload = 61,
- HIP_API_ID_hipHostUnregister = 62,
- HIP_API_ID_hipProfilerStop = 63,
- HIP_API_ID_hipExtStreamCreateWithCUMask = 64,
- HIP_API_ID_hipStreamSynchronize = 65,
- HIP_API_ID_hipFreeHost = 66,
- HIP_API_ID_hipDeviceSetCacheConfig = 67,
- HIP_API_ID_hipGetErrorName = 68,
- HIP_API_ID_hipMemcpyHtoD = 69,
- HIP_API_ID_hipModuleGetGlobal = 70,
- HIP_API_ID_hipMemcpyHtoA = 71,
- HIP_API_ID_hipCtxCreate = 72,
- HIP_API_ID_hipMemcpy2D = 73,
- HIP_API_ID_hipIpcCloseMemHandle = 74,
- HIP_API_ID_hipChooseDevice = 75,
- HIP_API_ID_hipDeviceSetSharedMemConfig = 76,
- HIP_API_ID_hipMallocMipmappedArray = 77,
- HIP_API_ID_hipSetupArgument = 78,
- HIP_API_ID_hipIpcGetEventHandle = 79,
- HIP_API_ID_hipFreeArray = 80,
- HIP_API_ID_hipCtxSetCacheConfig = 81,
- HIP_API_ID_hipFuncSetCacheConfig = 82,
- HIP_API_ID_hipLaunchKernel = 83,
- HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags = 84,
- HIP_API_ID_hipModuleGetTexRef = 85,
- HIP_API_ID_hipFuncSetAttribute = 86,
- HIP_API_ID_hipEventElapsedTime = 87,
- HIP_API_ID_hipConfigureCall = 88,
- HIP_API_ID_hipMemAdvise = 89,
- HIP_API_ID_hipMemcpy3DAsync = 90,
- HIP_API_ID_hipEventDestroy = 91,
- HIP_API_ID_hipCtxPopCurrent = 92,
- HIP_API_ID_hipGetSymbolAddress = 93,
- HIP_API_ID_hipHostGetFlags = 94,
- HIP_API_ID_hipHostMalloc = 95,
- HIP_API_ID_hipCtxSetSharedMemConfig = 96,
- HIP_API_ID_hipFreeMipmappedArray = 97,
- HIP_API_ID_hipMemGetInfo = 98,
- HIP_API_ID_hipDeviceReset = 99,
- HIP_API_ID_hipMemset = 100,
- HIP_API_ID_hipMemsetD8 = 101,
- HIP_API_ID_hipMemcpyParam2DAsync = 102,
- HIP_API_ID_hipHostRegister = 103,
- HIP_API_ID_hipDriverGetVersion = 104,
- HIP_API_ID_hipArray3DCreate = 105,
- HIP_API_ID_hipIpcOpenMemHandle = 106,
- HIP_API_ID_hipGetLastError = 107,
- HIP_API_ID_hipGetDeviceFlags = 108,
- HIP_API_ID_hipDeviceGetSharedMemConfig = 109,
- HIP_API_ID_hipDrvMemcpy3D = 110,
- HIP_API_ID_hipMemcpy2DFromArray = 111,
- HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags = 112,
- HIP_API_ID_hipSetDeviceFlags = 113,
- HIP_API_ID_hipHccModuleLaunchKernel = 114,
- HIP_API_ID_hipFree = 115,
- HIP_API_ID_hipOccupancyMaxPotentialBlockSize = 116,
- HIP_API_ID_hipDeviceGetAttribute = 117,
- HIP_API_ID_hipDeviceComputeCapability = 118,
- HIP_API_ID_hipCtxDisablePeerAccess = 119,
- HIP_API_ID_hipMallocManaged = 120,
- HIP_API_ID_hipDeviceGetByPCIBusId = 121,
- HIP_API_ID_hipIpcGetMemHandle = 122,
- HIP_API_ID_hipMemcpyHtoDAsync = 123,
- HIP_API_ID_hipCtxGetDevice = 124,
- HIP_API_ID_hipMemcpyDtoD = 125,
- HIP_API_ID_hipModuleLoadData = 126,
- HIP_API_ID_hipDevicePrimaryCtxRelease = 127,
- HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessor = 128,
- HIP_API_ID_hipCtxSetCurrent = 129,
- HIP_API_ID_hipGetErrorString = 130,
- HIP_API_ID_hipStreamCreate = 131,
- HIP_API_ID_hipDevicePrimaryCtxRetain = 132,
- HIP_API_ID_hipDeviceGet = 133,
- HIP_API_ID_hipStreamCreateWithFlags = 134,
- HIP_API_ID_hipMemcpyFromArray = 135,
- HIP_API_ID_hipMemcpy2DAsync = 136,
- HIP_API_ID_hipFuncGetAttributes = 137,
- HIP_API_ID_hipGetSymbolSize = 138,
- HIP_API_ID_hipHostFree = 139,
- HIP_API_ID_hipEventCreateWithFlags = 140,
- HIP_API_ID_hipStreamQuery = 141,
- HIP_API_ID_hipMemcpy3D = 142,
- HIP_API_ID_hipMemcpyToSymbol = 143,
- HIP_API_ID_hipMemcpy = 144,
- HIP_API_ID_hipPeekAtLastError = 145,
- HIP_API_ID_hipExtLaunchMultiKernelMultiDevice = 146,
- HIP_API_ID_hipHostAlloc = 147,
- HIP_API_ID_hipStreamAddCallback = 148,
- HIP_API_ID_hipMemcpyToArray = 149,
- HIP_API_ID_hipMemsetD32 = 150,
- HIP_API_ID_hipExtModuleLaunchKernel = 151,
- HIP_API_ID_hipDeviceSynchronize = 152,
- HIP_API_ID_hipDeviceGetCacheConfig = 153,
- HIP_API_ID_hipMalloc3D = 154,
- HIP_API_ID_hipPointerGetAttributes = 155,
- HIP_API_ID_hipMemsetAsync = 156,
- HIP_API_ID_hipDeviceGetName = 157,
- HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSizeWithFlags = 158,
- HIP_API_ID_hipCtxPushCurrent = 159,
- HIP_API_ID_hipMemcpyPeer = 160,
- HIP_API_ID_hipEventSynchronize = 161,
- HIP_API_ID_hipMemcpyDtoDAsync = 162,
- HIP_API_ID_hipProfilerStart = 163,
- HIP_API_ID_hipExtMallocWithFlags = 164,
- HIP_API_ID_hipCtxEnablePeerAccess = 165,
- HIP_API_ID_hipMemAllocHost = 166,
- HIP_API_ID_hipMemcpyDtoHAsync = 167,
- HIP_API_ID_hipModuleLaunchKernel = 168,
- HIP_API_ID_hipMemAllocPitch = 169,
- HIP_API_ID_hipExtLaunchKernel = 170,
- HIP_API_ID_hipMemcpy2DFromArrayAsync = 171,
- HIP_API_ID_hipDeviceGetLimit = 172,
- HIP_API_ID_hipModuleLoadDataEx = 173,
- HIP_API_ID_hipRuntimeGetVersion = 174,
- HIP_API_ID_hipMemRangeGetAttribute = 175,
- HIP_API_ID_hipDeviceGetP2PAttribute = 176,
- HIP_API_ID_hipMemcpyPeerAsync = 177,
- HIP_API_ID_hipGetDeviceProperties = 178,
- HIP_API_ID_hipMemcpyDtoH = 179,
- HIP_API_ID_hipMemcpyWithStream = 180,
- HIP_API_ID_hipDeviceTotalMem = 181,
- HIP_API_ID_hipHostGetDevicePointer = 182,
- HIP_API_ID_hipMemRangeGetAttributes = 183,
- HIP_API_ID_hipMemcpyParam2D = 184,
- HIP_API_ID_hipDevicePrimaryCtxReset = 185,
- HIP_API_ID_hipGetMipmappedArrayLevel = 186,
- HIP_API_ID_hipMemsetD32Async = 187,
- HIP_API_ID_hipGetDevice = 188,
- HIP_API_ID_hipGetDeviceCount = 189,
- HIP_API_ID_hipIpcOpenEventHandle = 190,
- HIP_API_ID_NUMBER = 191,
-
- HIP_API_ID_NONE = HIP_API_ID_NUMBER,
- HIP_API_ID_hipTexRefGetAddress = HIP_API_ID_NUMBER,
- HIP_API_ID_hipTexRefSetBorderColor = HIP_API_ID_NUMBER,
- HIP_API_ID_hipMemcpyDtoA = HIP_API_ID_NUMBER,
- HIP_API_ID_hipArrayGetDescriptor = HIP_API_ID_NUMBER,
- HIP_API_ID_hipTexObjectGetResourceViewDesc = HIP_API_ID_NUMBER,
- HIP_API_ID_hipMemcpyAtoHAsync = HIP_API_ID_NUMBER,
- HIP_API_ID_hipDestroyTextureObject = HIP_API_ID_NUMBER,
- HIP_API_ID_hipArray3DGetDescriptor = HIP_API_ID_NUMBER,
- HIP_API_ID_hipTexRefSetAddress = HIP_API_ID_NUMBER,
- HIP_API_ID_hipArrayDestroy = HIP_API_ID_NUMBER,
- HIP_API_ID_hipTexRefGetMaxAnisotropy = HIP_API_ID_NUMBER,
- HIP_API_ID_hipTexRefSetMipmapFilterMode = HIP_API_ID_NUMBER,
- HIP_API_ID_hipDeviceGetCount = HIP_API_ID_NUMBER,
- HIP_API_ID_hipMemcpyArrayToArray = HIP_API_ID_NUMBER,
- HIP_API_ID_hipBindTexture2D = HIP_API_ID_NUMBER,
- HIP_API_ID_hipCreateTextureObject = HIP_API_ID_NUMBER,
- HIP_API_ID_hipMemcpyHtoAAsync = HIP_API_ID_NUMBER,
- HIP_API_ID_hipMemcpyAtoA = HIP_API_ID_NUMBER,
- HIP_API_ID_hipMemcpyAtoD = HIP_API_ID_NUMBER,
- HIP_API_ID_hipBindTextureToMipmappedArray = HIP_API_ID_NUMBER,
- HIP_API_ID_hipTexRefGetMipmapLevelClamp = HIP_API_ID_NUMBER,
- HIP_API_ID_hipBindTextureToArray = HIP_API_ID_NUMBER,
- HIP_API_ID_hipTexRefSetFlags = HIP_API_ID_NUMBER,
- HIP_API_ID_hipTexRefSetFormat = HIP_API_ID_NUMBER,
- HIP_API_ID_hipTexObjectGetTextureDesc = HIP_API_ID_NUMBER,
- HIP_API_ID_hipTexObjectDestroy = HIP_API_ID_NUMBER,
- HIP_API_ID_hipMemcpy2DArrayToArray = HIP_API_ID_NUMBER,
- HIP_API_ID_hipTexRefGetArray = HIP_API_ID_NUMBER,
- HIP_API_ID_hipGetTextureReference = HIP_API_ID_NUMBER,
- HIP_API_ID_hipMipmappedArrayDestroy = HIP_API_ID_NUMBER,
- HIP_API_ID_hipTexRefGetFilterMode = HIP_API_ID_NUMBER,
- HIP_API_ID_hipTexRefGetFormat = HIP_API_ID_NUMBER,
- HIP_API_ID_hipTexRefSetArray = HIP_API_ID_NUMBER,
- HIP_API_ID_hipMemcpyToArrayAsync = HIP_API_ID_NUMBER,
- HIP_API_ID_hipTexRefSetAddress2D = HIP_API_ID_NUMBER,
- HIP_API_ID_hipGetTextureObjectResourceViewDesc = HIP_API_ID_NUMBER,
- HIP_API_ID_hipTexRefGetFlags = HIP_API_ID_NUMBER,
- HIP_API_ID_hipUnbindTexture = HIP_API_ID_NUMBER,
- HIP_API_ID_hipTexRefGetMipmapLevelBias = HIP_API_ID_NUMBER,
- HIP_API_ID_hipTexRefSetFilterMode = HIP_API_ID_NUMBER,
- HIP_API_ID_hipGetTextureAlignmentOffset = HIP_API_ID_NUMBER,
- HIP_API_ID_hipMipmappedArrayGetLevel = HIP_API_ID_NUMBER,
- HIP_API_ID_hipCreateSurfaceObject = HIP_API_ID_NUMBER,
- HIP_API_ID_hipMipmappedArrayCreate = HIP_API_ID_NUMBER,
- HIP_API_ID_hipTexObjectGetResourceDesc = HIP_API_ID_NUMBER,
- HIP_API_ID_hipGetChannelDesc = HIP_API_ID_NUMBER,
- HIP_API_ID_hipTexRefGetAddressMode = HIP_API_ID_NUMBER,
- HIP_API_ID_hipGetTextureObjectResourceDesc = HIP_API_ID_NUMBER,
- HIP_API_ID_hipModuleLaunchKernelExt = HIP_API_ID_NUMBER,
- HIP_API_ID_hipMemcpy2DToArrayAsync = HIP_API_ID_NUMBER,
- HIP_API_ID_hipTexRefGetBorderColor = HIP_API_ID_NUMBER,
- HIP_API_ID_hipDestroySurfaceObject = HIP_API_ID_NUMBER,
- HIP_API_ID_hipTexRefGetMipmapFilterMode = HIP_API_ID_NUMBER,
- HIP_API_ID_hipTexRefSetMaxAnisotropy = HIP_API_ID_NUMBER,
- HIP_API_ID_hipTexObjectCreate = HIP_API_ID_NUMBER,
- HIP_API_ID_hipTexRefSetAddressMode = HIP_API_ID_NUMBER,
- HIP_API_ID_hipTexRefSetMipmapLevelBias = HIP_API_ID_NUMBER,
- HIP_API_ID_hipMemcpyFromArrayAsync = HIP_API_ID_NUMBER,
- HIP_API_ID_hipBindTexture = HIP_API_ID_NUMBER,
- HIP_API_ID_hipTexRefSetMipmappedArray = HIP_API_ID_NUMBER,
- HIP_API_ID_hipTexRefGetMipmappedArray = HIP_API_ID_NUMBER,
- HIP_API_ID_hipSetValidDevices = HIP_API_ID_NUMBER,
- HIP_API_ID_ihipModuleLaunchKernel = HIP_API_ID_NUMBER,
- HIP_API_ID_hipTexRefSetMipmapLevelClamp = HIP_API_ID_NUMBER,
- HIP_API_ID_hipGetTextureObjectTextureDesc = HIP_API_ID_NUMBER,
-};
-
-// Return HIP API string by given ID
-static inline const char* hip_api_name(const uint32_t id) {
- switch(id) {
- case HIP_API_ID_hipDrvMemcpy3DAsync: return "hipDrvMemcpy3DAsync";
- case HIP_API_ID_hipDeviceEnablePeerAccess: return "hipDeviceEnablePeerAccess";
- case HIP_API_ID_hipFuncSetSharedMemConfig: return "hipFuncSetSharedMemConfig";
- case HIP_API_ID_hipMemcpyToSymbolAsync: return "hipMemcpyToSymbolAsync";
- case HIP_API_ID_hipMallocPitch: return "hipMallocPitch";
- case HIP_API_ID_hipMalloc: return "hipMalloc";
- case HIP_API_ID_hipMemsetD16: return "hipMemsetD16";
- case HIP_API_ID_hipExtStreamGetCUMask: return "hipExtStreamGetCUMask";
- case HIP_API_ID_hipEventRecord: return "hipEventRecord";
- case HIP_API_ID_hipCtxSynchronize: return "hipCtxSynchronize";
- case HIP_API_ID_hipSetDevice: return "hipSetDevice";
- case HIP_API_ID_hipCtxGetApiVersion: return "hipCtxGetApiVersion";
- case HIP_API_ID_hipMemcpyFromSymbolAsync: return "hipMemcpyFromSymbolAsync";
- case HIP_API_ID_hipExtGetLinkTypeAndHopCount: return "hipExtGetLinkTypeAndHopCount";
- case HIP_API_ID___hipPopCallConfiguration: return "__hipPopCallConfiguration";
- case HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor: return "hipModuleOccupancyMaxActiveBlocksPerMultiprocessor";
- case HIP_API_ID_hipMemset3D: return "hipMemset3D";
- case HIP_API_ID_hipStreamCreateWithPriority: return "hipStreamCreateWithPriority";
- case HIP_API_ID_hipMemcpy2DToArray: return "hipMemcpy2DToArray";
- case HIP_API_ID_hipMemsetD8Async: return "hipMemsetD8Async";
- case HIP_API_ID_hipCtxGetCacheConfig: return "hipCtxGetCacheConfig";
- case HIP_API_ID_hipModuleGetFunction: return "hipModuleGetFunction";
- case HIP_API_ID_hipStreamWaitEvent: return "hipStreamWaitEvent";
- case HIP_API_ID_hipDeviceGetStreamPriorityRange: return "hipDeviceGetStreamPriorityRange";
- case HIP_API_ID_hipModuleLoad: return "hipModuleLoad";
- case HIP_API_ID_hipDevicePrimaryCtxSetFlags: return "hipDevicePrimaryCtxSetFlags";
- case HIP_API_ID_hipLaunchCooperativeKernel: return "hipLaunchCooperativeKernel";
- case HIP_API_ID_hipLaunchCooperativeKernelMultiDevice: return "hipLaunchCooperativeKernelMultiDevice";
- case HIP_API_ID_hipMemcpyAsync: return "hipMemcpyAsync";
- case HIP_API_ID_hipMalloc3DArray: return "hipMalloc3DArray";
- case HIP_API_ID_hipMallocHost: return "hipMallocHost";
- case HIP_API_ID_hipCtxGetCurrent: return "hipCtxGetCurrent";
- case HIP_API_ID_hipDevicePrimaryCtxGetState: return "hipDevicePrimaryCtxGetState";
- case HIP_API_ID_hipEventQuery: return "hipEventQuery";
- case HIP_API_ID_hipEventCreate: return "hipEventCreate";
- case HIP_API_ID_hipMemGetAddressRange: return "hipMemGetAddressRange";
- case HIP_API_ID_hipMemcpyFromSymbol: return "hipMemcpyFromSymbol";
- case HIP_API_ID_hipArrayCreate: return "hipArrayCreate";
- case HIP_API_ID_hipStreamAttachMemAsync: return "hipStreamAttachMemAsync";
- case HIP_API_ID_hipStreamGetFlags: return "hipStreamGetFlags";
- case HIP_API_ID_hipMallocArray: return "hipMallocArray";
- case HIP_API_ID_hipCtxGetSharedMemConfig: return "hipCtxGetSharedMemConfig";
- case HIP_API_ID_hipDeviceDisablePeerAccess: return "hipDeviceDisablePeerAccess";
- case HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSize: return "hipModuleOccupancyMaxPotentialBlockSize";
- case HIP_API_ID_hipMemPtrGetInfo: return "hipMemPtrGetInfo";
- case HIP_API_ID_hipFuncGetAttribute: return "hipFuncGetAttribute";
- case HIP_API_ID_hipCtxGetFlags: return "hipCtxGetFlags";
- case HIP_API_ID_hipStreamDestroy: return "hipStreamDestroy";
- case HIP_API_ID___hipPushCallConfiguration: return "__hipPushCallConfiguration";
- case HIP_API_ID_hipMemset3DAsync: return "hipMemset3DAsync";
- case HIP_API_ID_hipDeviceGetPCIBusId: return "hipDeviceGetPCIBusId";
- case HIP_API_ID_hipInit: return "hipInit";
- case HIP_API_ID_hipMemcpyAtoH: return "hipMemcpyAtoH";
- case HIP_API_ID_hipStreamGetPriority: return "hipStreamGetPriority";
- case HIP_API_ID_hipMemset2D: return "hipMemset2D";
- case HIP_API_ID_hipMemset2DAsync: return "hipMemset2DAsync";
- case HIP_API_ID_hipDeviceCanAccessPeer: return "hipDeviceCanAccessPeer";
- case HIP_API_ID_hipLaunchByPtr: return "hipLaunchByPtr";
- case HIP_API_ID_hipMemPrefetchAsync: return "hipMemPrefetchAsync";
- case HIP_API_ID_hipCtxDestroy: return "hipCtxDestroy";
- case HIP_API_ID_hipMemsetD16Async: return "hipMemsetD16Async";
- case HIP_API_ID_hipModuleUnload: return "hipModuleUnload";
- case HIP_API_ID_hipHostUnregister: return "hipHostUnregister";
- case HIP_API_ID_hipProfilerStop: return "hipProfilerStop";
- case HIP_API_ID_hipExtStreamCreateWithCUMask: return "hipExtStreamCreateWithCUMask";
- case HIP_API_ID_hipStreamSynchronize: return "hipStreamSynchronize";
- case HIP_API_ID_hipFreeHost: return "hipFreeHost";
- case HIP_API_ID_hipDeviceSetCacheConfig: return "hipDeviceSetCacheConfig";
- case HIP_API_ID_hipGetErrorName: return "hipGetErrorName";
- case HIP_API_ID_hipMemcpyHtoD: return "hipMemcpyHtoD";
- case HIP_API_ID_hipModuleGetGlobal: return "hipModuleGetGlobal";
- case HIP_API_ID_hipMemcpyHtoA: return "hipMemcpyHtoA";
- case HIP_API_ID_hipCtxCreate: return "hipCtxCreate";
- case HIP_API_ID_hipMemcpy2D: return "hipMemcpy2D";
- case HIP_API_ID_hipIpcCloseMemHandle: return "hipIpcCloseMemHandle";
- case HIP_API_ID_hipChooseDevice: return "hipChooseDevice";
- case HIP_API_ID_hipDeviceSetSharedMemConfig: return "hipDeviceSetSharedMemConfig";
- case HIP_API_ID_hipMallocMipmappedArray: return "hipMallocMipmappedArray";
- case HIP_API_ID_hipSetupArgument: return "hipSetupArgument";
- case HIP_API_ID_hipIpcGetEventHandle: return "hipIpcGetEventHandle";
- case HIP_API_ID_hipFreeArray: return "hipFreeArray";
- case HIP_API_ID_hipCtxSetCacheConfig: return "hipCtxSetCacheConfig";
- case HIP_API_ID_hipFuncSetCacheConfig: return "hipFuncSetCacheConfig";
- case HIP_API_ID_hipLaunchKernel: return "hipLaunchKernel";
- case HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags: return "hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags";
- case HIP_API_ID_hipModuleGetTexRef: return "hipModuleGetTexRef";
- case HIP_API_ID_hipFuncSetAttribute: return "hipFuncSetAttribute";
- case HIP_API_ID_hipEventElapsedTime: return "hipEventElapsedTime";
- case HIP_API_ID_hipConfigureCall: return "hipConfigureCall";
- case HIP_API_ID_hipMemAdvise: return "hipMemAdvise";
- case HIP_API_ID_hipMemcpy3DAsync: return "hipMemcpy3DAsync";
- case HIP_API_ID_hipEventDestroy: return "hipEventDestroy";
- case HIP_API_ID_hipCtxPopCurrent: return "hipCtxPopCurrent";
- case HIP_API_ID_hipGetSymbolAddress: return "hipGetSymbolAddress";
- case HIP_API_ID_hipHostGetFlags: return "hipHostGetFlags";
- case HIP_API_ID_hipHostMalloc: return "hipHostMalloc";
- case HIP_API_ID_hipCtxSetSharedMemConfig: return "hipCtxSetSharedMemConfig";
- case HIP_API_ID_hipFreeMipmappedArray: return "hipFreeMipmappedArray";
- case HIP_API_ID_hipMemGetInfo: return "hipMemGetInfo";
- case HIP_API_ID_hipDeviceReset: return "hipDeviceReset";
- case HIP_API_ID_hipMemset: return "hipMemset";
- case HIP_API_ID_hipMemsetD8: return "hipMemsetD8";
- case HIP_API_ID_hipMemcpyParam2DAsync: return "hipMemcpyParam2DAsync";
- case HIP_API_ID_hipHostRegister: return "hipHostRegister";
- case HIP_API_ID_hipDriverGetVersion: return "hipDriverGetVersion";
- case HIP_API_ID_hipArray3DCreate: return "hipArray3DCreate";
- case HIP_API_ID_hipIpcOpenMemHandle: return "hipIpcOpenMemHandle";
- case HIP_API_ID_hipGetLastError: return "hipGetLastError";
- case HIP_API_ID_hipGetDeviceFlags: return "hipGetDeviceFlags";
- case HIP_API_ID_hipDeviceGetSharedMemConfig: return "hipDeviceGetSharedMemConfig";
- case HIP_API_ID_hipDrvMemcpy3D: return "hipDrvMemcpy3D";
- case HIP_API_ID_hipMemcpy2DFromArray: return "hipMemcpy2DFromArray";
- case HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags: return "hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags";
- case HIP_API_ID_hipSetDeviceFlags: return "hipSetDeviceFlags";
- case HIP_API_ID_hipHccModuleLaunchKernel: return "hipHccModuleLaunchKernel";
- case HIP_API_ID_hipFree: return "hipFree";
- case HIP_API_ID_hipOccupancyMaxPotentialBlockSize: return "hipOccupancyMaxPotentialBlockSize";
- case HIP_API_ID_hipDeviceGetAttribute: return "hipDeviceGetAttribute";
- case HIP_API_ID_hipDeviceComputeCapability: return "hipDeviceComputeCapability";
- case HIP_API_ID_hipCtxDisablePeerAccess: return "hipCtxDisablePeerAccess";
- case HIP_API_ID_hipMallocManaged: return "hipMallocManaged";
- case HIP_API_ID_hipDeviceGetByPCIBusId: return "hipDeviceGetByPCIBusId";
- case HIP_API_ID_hipIpcGetMemHandle: return "hipIpcGetMemHandle";
- case HIP_API_ID_hipMemcpyHtoDAsync: return "hipMemcpyHtoDAsync";
- case HIP_API_ID_hipCtxGetDevice: return "hipCtxGetDevice";
- case HIP_API_ID_hipMemcpyDtoD: return "hipMemcpyDtoD";
- case HIP_API_ID_hipModuleLoadData: return "hipModuleLoadData";
- case HIP_API_ID_hipDevicePrimaryCtxRelease: return "hipDevicePrimaryCtxRelease";
- case HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessor: return "hipOccupancyMaxActiveBlocksPerMultiprocessor";
- case HIP_API_ID_hipCtxSetCurrent: return "hipCtxSetCurrent";
- case HIP_API_ID_hipGetErrorString: return "hipGetErrorString";
- case HIP_API_ID_hipStreamCreate: return "hipStreamCreate";
- case HIP_API_ID_hipDevicePrimaryCtxRetain: return "hipDevicePrimaryCtxRetain";
- case HIP_API_ID_hipDeviceGet: return "hipDeviceGet";
- case HIP_API_ID_hipStreamCreateWithFlags: return "hipStreamCreateWithFlags";
- case HIP_API_ID_hipMemcpyFromArray: return "hipMemcpyFromArray";
- case HIP_API_ID_hipMemcpy2DAsync: return "hipMemcpy2DAsync";
- case HIP_API_ID_hipFuncGetAttributes: return "hipFuncGetAttributes";
- case HIP_API_ID_hipGetSymbolSize: return "hipGetSymbolSize";
- case HIP_API_ID_hipHostFree: return "hipHostFree";
- case HIP_API_ID_hipEventCreateWithFlags: return "hipEventCreateWithFlags";
- case HIP_API_ID_hipStreamQuery: return "hipStreamQuery";
- case HIP_API_ID_hipMemcpy3D: return "hipMemcpy3D";
- case HIP_API_ID_hipMemcpyToSymbol: return "hipMemcpyToSymbol";
- case HIP_API_ID_hipMemcpy: return "hipMemcpy";
- case HIP_API_ID_hipPeekAtLastError: return "hipPeekAtLastError";
- case HIP_API_ID_hipExtLaunchMultiKernelMultiDevice: return "hipExtLaunchMultiKernelMultiDevice";
- case HIP_API_ID_hipHostAlloc: return "hipHostAlloc";
- case HIP_API_ID_hipStreamAddCallback: return "hipStreamAddCallback";
- case HIP_API_ID_hipMemcpyToArray: return "hipMemcpyToArray";
- case HIP_API_ID_hipMemsetD32: return "hipMemsetD32";
- case HIP_API_ID_hipExtModuleLaunchKernel: return "hipExtModuleLaunchKernel";
- case HIP_API_ID_hipDeviceSynchronize: return "hipDeviceSynchronize";
- case HIP_API_ID_hipDeviceGetCacheConfig: return "hipDeviceGetCacheConfig";
- case HIP_API_ID_hipMalloc3D: return "hipMalloc3D";
- case HIP_API_ID_hipPointerGetAttributes: return "hipPointerGetAttributes";
- case HIP_API_ID_hipMemsetAsync: return "hipMemsetAsync";
- case HIP_API_ID_hipDeviceGetName: return "hipDeviceGetName";
- case HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSizeWithFlags: return "hipModuleOccupancyMaxPotentialBlockSizeWithFlags";
- case HIP_API_ID_hipCtxPushCurrent: return "hipCtxPushCurrent";
- case HIP_API_ID_hipMemcpyPeer: return "hipMemcpyPeer";
- case HIP_API_ID_hipEventSynchronize: return "hipEventSynchronize";
- case HIP_API_ID_hipMemcpyDtoDAsync: return "hipMemcpyDtoDAsync";
- case HIP_API_ID_hipProfilerStart: return "hipProfilerStart";
- case HIP_API_ID_hipExtMallocWithFlags: return "hipExtMallocWithFlags";
- case HIP_API_ID_hipCtxEnablePeerAccess: return "hipCtxEnablePeerAccess";
- case HIP_API_ID_hipMemAllocHost: return "hipMemAllocHost";
- case HIP_API_ID_hipMemcpyDtoHAsync: return "hipMemcpyDtoHAsync";
- case HIP_API_ID_hipModuleLaunchKernel: return "hipModuleLaunchKernel";
- case HIP_API_ID_hipMemAllocPitch: return "hipMemAllocPitch";
- case HIP_API_ID_hipExtLaunchKernel: return "hipExtLaunchKernel";
- case HIP_API_ID_hipMemcpy2DFromArrayAsync: return "hipMemcpy2DFromArrayAsync";
- case HIP_API_ID_hipDeviceGetLimit: return "hipDeviceGetLimit";
- case HIP_API_ID_hipModuleLoadDataEx: return "hipModuleLoadDataEx";
- case HIP_API_ID_hipRuntimeGetVersion: return "hipRuntimeGetVersion";
- case HIP_API_ID_hipMemRangeGetAttribute: return "hipMemRangeGetAttribute";
- case HIP_API_ID_hipDeviceGetP2PAttribute: return "hipDeviceGetP2PAttribute";
- case HIP_API_ID_hipMemcpyPeerAsync: return "hipMemcpyPeerAsync";
- case HIP_API_ID_hipGetDeviceProperties: return "hipGetDeviceProperties";
- case HIP_API_ID_hipMemcpyDtoH: return "hipMemcpyDtoH";
- case HIP_API_ID_hipMemcpyWithStream: return "hipMemcpyWithStream";
- case HIP_API_ID_hipDeviceTotalMem: return "hipDeviceTotalMem";
- case HIP_API_ID_hipHostGetDevicePointer: return "hipHostGetDevicePointer";
- case HIP_API_ID_hipMemRangeGetAttributes: return "hipMemRangeGetAttributes";
- case HIP_API_ID_hipMemcpyParam2D: return "hipMemcpyParam2D";
- case HIP_API_ID_hipDevicePrimaryCtxReset: return "hipDevicePrimaryCtxReset";
- case HIP_API_ID_hipGetMipmappedArrayLevel: return "hipGetMipmappedArrayLevel";
- case HIP_API_ID_hipMemsetD32Async: return "hipMemsetD32Async";
- case HIP_API_ID_hipGetDevice: return "hipGetDevice";
- case HIP_API_ID_hipGetDeviceCount: return "hipGetDeviceCount";
- case HIP_API_ID_hipIpcOpenEventHandle: return "hipIpcOpenEventHandle";
- };
- return "unknown";
-};
-
-#include <string.h>
-// Return HIP API ID by given name
-static inline uint32_t hipApiIdByName(const char* name) {
- if (strcmp("hipDrvMemcpy3DAsync", name) == 0) return HIP_API_ID_hipDrvMemcpy3DAsync;
- if (strcmp("hipDeviceEnablePeerAccess", name) == 0) return HIP_API_ID_hipDeviceEnablePeerAccess;
- if (strcmp("hipFuncSetSharedMemConfig", name) == 0) return HIP_API_ID_hipFuncSetSharedMemConfig;
- if (strcmp("hipMemcpyToSymbolAsync", name) == 0) return HIP_API_ID_hipMemcpyToSymbolAsync;
- if (strcmp("hipMallocPitch", name) == 0) return HIP_API_ID_hipMallocPitch;
- if (strcmp("hipMalloc", name) == 0) return HIP_API_ID_hipMalloc;
- if (strcmp("hipMemsetD16", name) == 0) return HIP_API_ID_hipMemsetD16;
- if (strcmp("hipExtStreamGetCUMask", name) == 0) return HIP_API_ID_hipExtStreamGetCUMask;
- if (strcmp("hipEventRecord", name) == 0) return HIP_API_ID_hipEventRecord;
- if (strcmp("hipCtxSynchronize", name) == 0) return HIP_API_ID_hipCtxSynchronize;
- if (strcmp("hipSetDevice", name) == 0) return HIP_API_ID_hipSetDevice;
- if (strcmp("hipCtxGetApiVersion", name) == 0) return HIP_API_ID_hipCtxGetApiVersion;
- if (strcmp("hipMemcpyFromSymbolAsync", name) == 0) return HIP_API_ID_hipMemcpyFromSymbolAsync;
- if (strcmp("hipExtGetLinkTypeAndHopCount", name) == 0) return HIP_API_ID_hipExtGetLinkTypeAndHopCount;
- if (strcmp("__hipPopCallConfiguration", name) == 0) return HIP_API_ID___hipPopCallConfiguration;
- if (strcmp("hipModuleOccupancyMaxActiveBlocksPerMultiprocessor", name) == 0) return HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor;
- if (strcmp("hipMemset3D", name) == 0) return HIP_API_ID_hipMemset3D;
- if (strcmp("hipStreamCreateWithPriority", name) == 0) return HIP_API_ID_hipStreamCreateWithPriority;
- if (strcmp("hipMemcpy2DToArray", name) == 0) return HIP_API_ID_hipMemcpy2DToArray;
- if (strcmp("hipMemsetD8Async", name) == 0) return HIP_API_ID_hipMemsetD8Async;
- if (strcmp("hipCtxGetCacheConfig", name) == 0) return HIP_API_ID_hipCtxGetCacheConfig;
- if (strcmp("hipModuleGetFunction", name) == 0) return HIP_API_ID_hipModuleGetFunction;
- if (strcmp("hipStreamWaitEvent", name) == 0) return HIP_API_ID_hipStreamWaitEvent;
- if (strcmp("hipDeviceGetStreamPriorityRange", name) == 0) return HIP_API_ID_hipDeviceGetStreamPriorityRange;
- if (strcmp("hipModuleLoad", name) == 0) return HIP_API_ID_hipModuleLoad;
- if (strcmp("hipDevicePrimaryCtxSetFlags", name) == 0) return HIP_API_ID_hipDevicePrimaryCtxSetFlags;
- if (strcmp("hipLaunchCooperativeKernel", name) == 0) return HIP_API_ID_hipLaunchCooperativeKernel;
- if (strcmp("hipLaunchCooperativeKernelMultiDevice", name) == 0) return HIP_API_ID_hipLaunchCooperativeKernelMultiDevice;
- if (strcmp("hipMemcpyAsync", name) == 0) return HIP_API_ID_hipMemcpyAsync;
- if (strcmp("hipMalloc3DArray", name) == 0) return HIP_API_ID_hipMalloc3DArray;
- if (strcmp("hipMallocHost", name) == 0) return HIP_API_ID_hipMallocHost;
- if (strcmp("hipCtxGetCurrent", name) == 0) return HIP_API_ID_hipCtxGetCurrent;
- if (strcmp("hipDevicePrimaryCtxGetState", name) == 0) return HIP_API_ID_hipDevicePrimaryCtxGetState;
- if (strcmp("hipEventQuery", name) == 0) return HIP_API_ID_hipEventQuery;
- if (strcmp("hipEventCreate", name) == 0) return HIP_API_ID_hipEventCreate;
- if (strcmp("hipMemGetAddressRange", name) == 0) return HIP_API_ID_hipMemGetAddressRange;
- if (strcmp("hipMemcpyFromSymbol", name) == 0) return HIP_API_ID_hipMemcpyFromSymbol;
- if (strcmp("hipArrayCreate", name) == 0) return HIP_API_ID_hipArrayCreate;
- if (strcmp("hipStreamAttachMemAsync", name) == 0) return HIP_API_ID_hipStreamAttachMemAsync;
- if (strcmp("hipStreamGetFlags", name) == 0) return HIP_API_ID_hipStreamGetFlags;
- if (strcmp("hipMallocArray", name) == 0) return HIP_API_ID_hipMallocArray;
- if (strcmp("hipCtxGetSharedMemConfig", name) == 0) return HIP_API_ID_hipCtxGetSharedMemConfig;
- if (strcmp("hipDeviceDisablePeerAccess", name) == 0) return HIP_API_ID_hipDeviceDisablePeerAccess;
- if (strcmp("hipModuleOccupancyMaxPotentialBlockSize", name) == 0) return HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSize;
- if (strcmp("hipMemPtrGetInfo", name) == 0) return HIP_API_ID_hipMemPtrGetInfo;
- if (strcmp("hipFuncGetAttribute", name) == 0) return HIP_API_ID_hipFuncGetAttribute;
- if (strcmp("hipCtxGetFlags", name) == 0) return HIP_API_ID_hipCtxGetFlags;
- if (strcmp("hipStreamDestroy", name) == 0) return HIP_API_ID_hipStreamDestroy;
- if (strcmp("__hipPushCallConfiguration", name) == 0) return HIP_API_ID___hipPushCallConfiguration;
- if (strcmp("hipMemset3DAsync", name) == 0) return HIP_API_ID_hipMemset3DAsync;
- if (strcmp("hipDeviceGetPCIBusId", name) == 0) return HIP_API_ID_hipDeviceGetPCIBusId;
- if (strcmp("hipInit", name) == 0) return HIP_API_ID_hipInit;
- if (strcmp("hipMemcpyAtoH", name) == 0) return HIP_API_ID_hipMemcpyAtoH;
- if (strcmp("hipStreamGetPriority", name) == 0) return HIP_API_ID_hipStreamGetPriority;
- if (strcmp("hipMemset2D", name) == 0) return HIP_API_ID_hipMemset2D;
- if (strcmp("hipMemset2DAsync", name) == 0) return HIP_API_ID_hipMemset2DAsync;
- if (strcmp("hipDeviceCanAccessPeer", name) == 0) return HIP_API_ID_hipDeviceCanAccessPeer;
- if (strcmp("hipLaunchByPtr", name) == 0) return HIP_API_ID_hipLaunchByPtr;
- if (strcmp("hipMemPrefetchAsync", name) == 0) return HIP_API_ID_hipMemPrefetchAsync;
- if (strcmp("hipCtxDestroy", name) == 0) return HIP_API_ID_hipCtxDestroy;
- if (strcmp("hipMemsetD16Async", name) == 0) return HIP_API_ID_hipMemsetD16Async;
- if (strcmp("hipModuleUnload", name) == 0) return HIP_API_ID_hipModuleUnload;
- if (strcmp("hipHostUnregister", name) == 0) return HIP_API_ID_hipHostUnregister;
- if (strcmp("hipProfilerStop", name) == 0) return HIP_API_ID_hipProfilerStop;
- if (strcmp("hipExtStreamCreateWithCUMask", name) == 0) return HIP_API_ID_hipExtStreamCreateWithCUMask;
- if (strcmp("hipStreamSynchronize", name) == 0) return HIP_API_ID_hipStreamSynchronize;
- if (strcmp("hipFreeHost", name) == 0) return HIP_API_ID_hipFreeHost;
- if (strcmp("hipDeviceSetCacheConfig", name) == 0) return HIP_API_ID_hipDeviceSetCacheConfig;
- if (strcmp("hipGetErrorName", name) == 0) return HIP_API_ID_hipGetErrorName;
- if (strcmp("hipMemcpyHtoD", name) == 0) return HIP_API_ID_hipMemcpyHtoD;
- if (strcmp("hipModuleGetGlobal", name) == 0) return HIP_API_ID_hipModuleGetGlobal;
- if (strcmp("hipMemcpyHtoA", name) == 0) return HIP_API_ID_hipMemcpyHtoA;
- if (strcmp("hipCtxCreate", name) == 0) return HIP_API_ID_hipCtxCreate;
- if (strcmp("hipMemcpy2D", name) == 0) return HIP_API_ID_hipMemcpy2D;
- if (strcmp("hipIpcCloseMemHandle", name) == 0) return HIP_API_ID_hipIpcCloseMemHandle;
- if (strcmp("hipChooseDevice", name) == 0) return HIP_API_ID_hipChooseDevice;
- if (strcmp("hipDeviceSetSharedMemConfig", name) == 0) return HIP_API_ID_hipDeviceSetSharedMemConfig;
- if (strcmp("hipMallocMipmappedArray", name) == 0) return HIP_API_ID_hipMallocMipmappedArray;
- if (strcmp("hipSetupArgument", name) == 0) return HIP_API_ID_hipSetupArgument;
- if (strcmp("hipIpcGetEventHandle", name) == 0) return HIP_API_ID_hipIpcGetEventHandle;
- if (strcmp("hipFreeArray", name) == 0) return HIP_API_ID_hipFreeArray;
- if (strcmp("hipCtxSetCacheConfig", name) == 0) return HIP_API_ID_hipCtxSetCacheConfig;
- if (strcmp("hipFuncSetCacheConfig", name) == 0) return HIP_API_ID_hipFuncSetCacheConfig;
- if (strcmp("hipLaunchKernel", name) == 0) return HIP_API_ID_hipLaunchKernel;
- if (strcmp("hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", name) == 0) return HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags;
- if (strcmp("hipModuleGetTexRef", name) == 0) return HIP_API_ID_hipModuleGetTexRef;
- if (strcmp("hipFuncSetAttribute", name) == 0) return HIP_API_ID_hipFuncSetAttribute;
- if (strcmp("hipEventElapsedTime", name) == 0) return HIP_API_ID_hipEventElapsedTime;
- if (strcmp("hipConfigureCall", name) == 0) return HIP_API_ID_hipConfigureCall;
- if (strcmp("hipMemAdvise", name) == 0) return HIP_API_ID_hipMemAdvise;
- if (strcmp("hipMemcpy3DAsync", name) == 0) return HIP_API_ID_hipMemcpy3DAsync;
- if (strcmp("hipEventDestroy", name) == 0) return HIP_API_ID_hipEventDestroy;
- if (strcmp("hipCtxPopCurrent", name) == 0) return HIP_API_ID_hipCtxPopCurrent;
- if (strcmp("hipGetSymbolAddress", name) == 0) return HIP_API_ID_hipGetSymbolAddress;
- if (strcmp("hipHostGetFlags", name) == 0) return HIP_API_ID_hipHostGetFlags;
- if (strcmp("hipHostMalloc", name) == 0) return HIP_API_ID_hipHostMalloc;
- if (strcmp("hipCtxSetSharedMemConfig", name) == 0) return HIP_API_ID_hipCtxSetSharedMemConfig;
- if (strcmp("hipFreeMipmappedArray", name) == 0) return HIP_API_ID_hipFreeMipmappedArray;
- if (strcmp("hipMemGetInfo", name) == 0) return HIP_API_ID_hipMemGetInfo;
- if (strcmp("hipDeviceReset", name) == 0) return HIP_API_ID_hipDeviceReset;
- if (strcmp("hipMemset", name) == 0) return HIP_API_ID_hipMemset;
- if (strcmp("hipMemsetD8", name) == 0) return HIP_API_ID_hipMemsetD8;
- if (strcmp("hipMemcpyParam2DAsync", name) == 0) return HIP_API_ID_hipMemcpyParam2DAsync;
- if (strcmp("hipHostRegister", name) == 0) return HIP_API_ID_hipHostRegister;
- if (strcmp("hipDriverGetVersion", name) == 0) return HIP_API_ID_hipDriverGetVersion;
- if (strcmp("hipArray3DCreate", name) == 0) return HIP_API_ID_hipArray3DCreate;
- if (strcmp("hipIpcOpenMemHandle", name) == 0) return HIP_API_ID_hipIpcOpenMemHandle;
- if (strcmp("hipGetLastError", name) == 0) return HIP_API_ID_hipGetLastError;
- if (strcmp("hipGetDeviceFlags", name) == 0) return HIP_API_ID_hipGetDeviceFlags;
- if (strcmp("hipDeviceGetSharedMemConfig", name) == 0) return HIP_API_ID_hipDeviceGetSharedMemConfig;
- if (strcmp("hipDrvMemcpy3D", name) == 0) return HIP_API_ID_hipDrvMemcpy3D;
- if (strcmp("hipMemcpy2DFromArray", name) == 0) return HIP_API_ID_hipMemcpy2DFromArray;
- if (strcmp("hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", name) == 0) return HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags;
- if (strcmp("hipSetDeviceFlags", name) == 0) return HIP_API_ID_hipSetDeviceFlags;
- if (strcmp("hipHccModuleLaunchKernel", name) == 0) return HIP_API_ID_hipHccModuleLaunchKernel;
- if (strcmp("hipFree", name) == 0) return HIP_API_ID_hipFree;
- if (strcmp("hipOccupancyMaxPotentialBlockSize", name) == 0) return HIP_API_ID_hipOccupancyMaxPotentialBlockSize;
- if (strcmp("hipDeviceGetAttribute", name) == 0) return HIP_API_ID_hipDeviceGetAttribute;
- if (strcmp("hipDeviceComputeCapability", name) == 0) return HIP_API_ID_hipDeviceComputeCapability;
- if (strcmp("hipCtxDisablePeerAccess", name) == 0) return HIP_API_ID_hipCtxDisablePeerAccess;
- if (strcmp("hipMallocManaged", name) == 0) return HIP_API_ID_hipMallocManaged;
- if (strcmp("hipDeviceGetByPCIBusId", name) == 0) return HIP_API_ID_hipDeviceGetByPCIBusId;
- if (strcmp("hipIpcGetMemHandle", name) == 0) return HIP_API_ID_hipIpcGetMemHandle;
- if (strcmp("hipMemcpyHtoDAsync", name) == 0) return HIP_API_ID_hipMemcpyHtoDAsync;
- if (strcmp("hipCtxGetDevice", name) == 0) return HIP_API_ID_hipCtxGetDevice;
- if (strcmp("hipMemcpyDtoD", name) == 0) return HIP_API_ID_hipMemcpyDtoD;
- if (strcmp("hipModuleLoadData", name) == 0) return HIP_API_ID_hipModuleLoadData;
- if (strcmp("hipDevicePrimaryCtxRelease", name) == 0) return HIP_API_ID_hipDevicePrimaryCtxRelease;
- if (strcmp("hipOccupancyMaxActiveBlocksPerMultiprocessor", name) == 0) return HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessor;
- if (strcmp("hipCtxSetCurrent", name) == 0) return HIP_API_ID_hipCtxSetCurrent;
- if (strcmp("hipGetErrorString", name) == 0) return HIP_API_ID_hipGetErrorString;
- if (strcmp("hipStreamCreate", name) == 0) return HIP_API_ID_hipStreamCreate;
- if (strcmp("hipDevicePrimaryCtxRetain", name) == 0) return HIP_API_ID_hipDevicePrimaryCtxRetain;
- if (strcmp("hipDeviceGet", name) == 0) return HIP_API_ID_hipDeviceGet;
- if (strcmp("hipStreamCreateWithFlags", name) == 0) return HIP_API_ID_hipStreamCreateWithFlags;
- if (strcmp("hipMemcpyFromArray", name) == 0) return HIP_API_ID_hipMemcpyFromArray;
- if (strcmp("hipMemcpy2DAsync", name) == 0) return HIP_API_ID_hipMemcpy2DAsync;
- if (strcmp("hipFuncGetAttributes", name) == 0) return HIP_API_ID_hipFuncGetAttributes;
- if (strcmp("hipGetSymbolSize", name) == 0) return HIP_API_ID_hipGetSymbolSize;
- if (strcmp("hipHostFree", name) == 0) return HIP_API_ID_hipHostFree;
- if (strcmp("hipEventCreateWithFlags", name) == 0) return HIP_API_ID_hipEventCreateWithFlags;
- if (strcmp("hipStreamQuery", name) == 0) return HIP_API_ID_hipStreamQuery;
- if (strcmp("hipMemcpy3D", name) == 0) return HIP_API_ID_hipMemcpy3D;
- if (strcmp("hipMemcpyToSymbol", name) == 0) return HIP_API_ID_hipMemcpyToSymbol;
- if (strcmp("hipMemcpy", name) == 0) return HIP_API_ID_hipMemcpy;
- if (strcmp("hipPeekAtLastError", name) == 0) return HIP_API_ID_hipPeekAtLastError;
- if (strcmp("hipExtLaunchMultiKernelMultiDevice", name) == 0) return HIP_API_ID_hipExtLaunchMultiKernelMultiDevice;
- if (strcmp("hipHostAlloc", name) == 0) return HIP_API_ID_hipHostAlloc;
- if (strcmp("hipStreamAddCallback", name) == 0) return HIP_API_ID_hipStreamAddCallback;
- if (strcmp("hipMemcpyToArray", name) == 0) return HIP_API_ID_hipMemcpyToArray;
- if (strcmp("hipMemsetD32", name) == 0) return HIP_API_ID_hipMemsetD32;
- if (strcmp("hipExtModuleLaunchKernel", name) == 0) return HIP_API_ID_hipExtModuleLaunchKernel;
- if (strcmp("hipDeviceSynchronize", name) == 0) return HIP_API_ID_hipDeviceSynchronize;
- if (strcmp("hipDeviceGetCacheConfig", name) == 0) return HIP_API_ID_hipDeviceGetCacheConfig;
- if (strcmp("hipMalloc3D", name) == 0) return HIP_API_ID_hipMalloc3D;
- if (strcmp("hipPointerGetAttributes", name) == 0) return HIP_API_ID_hipPointerGetAttributes;
- if (strcmp("hipMemsetAsync", name) == 0) return HIP_API_ID_hipMemsetAsync;
- if (strcmp("hipDeviceGetName", name) == 0) return HIP_API_ID_hipDeviceGetName;
- if (strcmp("hipModuleOccupancyMaxPotentialBlockSizeWithFlags", name) == 0) return HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSizeWithFlags;
- if (strcmp("hipCtxPushCurrent", name) == 0) return HIP_API_ID_hipCtxPushCurrent;
- if (strcmp("hipMemcpyPeer", name) == 0) return HIP_API_ID_hipMemcpyPeer;
- if (strcmp("hipEventSynchronize", name) == 0) return HIP_API_ID_hipEventSynchronize;
- if (strcmp("hipMemcpyDtoDAsync", name) == 0) return HIP_API_ID_hipMemcpyDtoDAsync;
- if (strcmp("hipProfilerStart", name) == 0) return HIP_API_ID_hipProfilerStart;
- if (strcmp("hipExtMallocWithFlags", name) == 0) return HIP_API_ID_hipExtMallocWithFlags;
- if (strcmp("hipCtxEnablePeerAccess", name) == 0) return HIP_API_ID_hipCtxEnablePeerAccess;
- if (strcmp("hipMemAllocHost", name) == 0) return HIP_API_ID_hipMemAllocHost;
- if (strcmp("hipMemcpyDtoHAsync", name) == 0) return HIP_API_ID_hipMemcpyDtoHAsync;
- if (strcmp("hipModuleLaunchKernel", name) == 0) return HIP_API_ID_hipModuleLaunchKernel;
- if (strcmp("hipMemAllocPitch", name) == 0) return HIP_API_ID_hipMemAllocPitch;
- if (strcmp("hipExtLaunchKernel", name) == 0) return HIP_API_ID_hipExtLaunchKernel;
- if (strcmp("hipMemcpy2DFromArrayAsync", name) == 0) return HIP_API_ID_hipMemcpy2DFromArrayAsync;
- if (strcmp("hipDeviceGetLimit", name) == 0) return HIP_API_ID_hipDeviceGetLimit;
- if (strcmp("hipModuleLoadDataEx", name) == 0) return HIP_API_ID_hipModuleLoadDataEx;
- if (strcmp("hipRuntimeGetVersion", name) == 0) return HIP_API_ID_hipRuntimeGetVersion;
- if (strcmp("hipMemRangeGetAttribute", name) == 0) return HIP_API_ID_hipMemRangeGetAttribute;
- if (strcmp("hipDeviceGetP2PAttribute", name) == 0) return HIP_API_ID_hipDeviceGetP2PAttribute;
- if (strcmp("hipMemcpyPeerAsync", name) == 0) return HIP_API_ID_hipMemcpyPeerAsync;
- if (strcmp("hipGetDeviceProperties", name) == 0) return HIP_API_ID_hipGetDeviceProperties;
- if (strcmp("hipMemcpyDtoH", name) == 0) return HIP_API_ID_hipMemcpyDtoH;
- if (strcmp("hipMemcpyWithStream", name) == 0) return HIP_API_ID_hipMemcpyWithStream;
- if (strcmp("hipDeviceTotalMem", name) == 0) return HIP_API_ID_hipDeviceTotalMem;
- if (strcmp("hipHostGetDevicePointer", name) == 0) return HIP_API_ID_hipHostGetDevicePointer;
- if (strcmp("hipMemRangeGetAttributes", name) == 0) return HIP_API_ID_hipMemRangeGetAttributes;
- if (strcmp("hipMemcpyParam2D", name) == 0) return HIP_API_ID_hipMemcpyParam2D;
- if (strcmp("hipDevicePrimaryCtxReset", name) == 0) return HIP_API_ID_hipDevicePrimaryCtxReset;
- if (strcmp("hipGetMipmappedArrayLevel", name) == 0) return HIP_API_ID_hipGetMipmappedArrayLevel;
- if (strcmp("hipMemsetD32Async", name) == 0) return HIP_API_ID_hipMemsetD32Async;
- if (strcmp("hipGetDevice", name) == 0) return HIP_API_ID_hipGetDevice;
- if (strcmp("hipGetDeviceCount", name) == 0) return HIP_API_ID_hipGetDeviceCount;
- if (strcmp("hipIpcOpenEventHandle", name) == 0) return HIP_API_ID_hipIpcOpenEventHandle;
- return HIP_API_ID_NUMBER;
-}
-
-// HIP API callbacks data structure
-typedef struct hip_api_data_s {
- uint64_t correlation_id;
- uint32_t phase;
- union {
- struct {
- const HIP_MEMCPY3D* pCopy;
- HIP_MEMCPY3D pCopy__val;
- hipStream_t stream;
- } hipDrvMemcpy3DAsync;
- struct {
- int peerDeviceId;
- unsigned int flags;
- } hipDeviceEnablePeerAccess;
- struct {
- const void* func;
- hipSharedMemConfig config;
- } hipFuncSetSharedMemConfig;
- struct {
- const void* symbol;
- const void* src;
- size_t sizeBytes;
- size_t offset;
- hipMemcpyKind kind;
- hipStream_t stream;
- } hipMemcpyToSymbolAsync;
- struct {
- void** ptr;
- void* ptr__val;
- size_t* pitch;
- size_t pitch__val;
- size_t width;
- size_t height;
- } hipMallocPitch;
- struct {
- void** ptr;
- void* ptr__val;
- size_t size;
- } hipMalloc;
- struct {
- hipDeviceptr_t dest;
- unsigned short value;
- size_t count;
- } hipMemsetD16;
- struct {
- hipStream_t stream;
- unsigned int cuMaskSize;
- unsigned int* cuMask;
- unsigned int cuMask__val;
- } hipExtStreamGetCUMask;
- struct {
- hipEvent_t event;
- hipStream_t stream;
- } hipEventRecord;
- struct {
- int deviceId;
- } hipSetDevice;
- struct {
- hipCtx_t ctx;
- int* apiVersion;
- int apiVersion__val;
- } hipCtxGetApiVersion;
- struct {
- void* dst;
- const void* symbol;
- size_t sizeBytes;
- size_t offset;
- hipMemcpyKind kind;
- hipStream_t stream;
- } hipMemcpyFromSymbolAsync;
- struct {
- int device1;
- int device2;
- unsigned int* linktype;
- unsigned int linktype__val;
- unsigned int* hopcount;
- unsigned int hopcount__val;
- } hipExtGetLinkTypeAndHopCount;
- struct {
- dim3* gridDim;
- dim3 gridDim__val;
- dim3* blockDim;
- dim3 blockDim__val;
- size_t* sharedMem;
- size_t sharedMem__val;
- hipStream_t* stream;
- hipStream_t stream__val;
- } __hipPopCallConfiguration;
- struct {
- int* numBlocks;
- int numBlocks__val;
- hipFunction_t f;
- int blockSize;
- size_t dynSharedMemPerBlk;
- } hipModuleOccupancyMaxActiveBlocksPerMultiprocessor;
- struct {
- hipPitchedPtr pitchedDevPtr;
- int value;
- hipExtent extent;
- } hipMemset3D;
- struct {
- hipStream_t* stream;
- hipStream_t stream__val;
- unsigned int flags;
- int priority;
- } hipStreamCreateWithPriority;
- struct {
- hipArray* dst;
- hipArray dst__val;
- size_t wOffset;
- size_t hOffset;
- const void* src;
- size_t spitch;
- size_t width;
- size_t height;
- hipMemcpyKind kind;
- } hipMemcpy2DToArray;
- struct {
- hipDeviceptr_t dest;
- unsigned char value;
- size_t count;
- hipStream_t stream;
- } hipMemsetD8Async;
- struct {
- hipFuncCache_t* cacheConfig;
- hipFuncCache_t cacheConfig__val;
- } hipCtxGetCacheConfig;
- struct {
- hipFunction_t* function;
- hipFunction_t function__val;
- hipModule_t module;
- const char* kname;
- char kname__val;
- } hipModuleGetFunction;
- struct {
- hipStream_t stream;
- hipEvent_t event;
- unsigned int flags;
- } hipStreamWaitEvent;
- struct {
- int* leastPriority;
- int leastPriority__val;
- int* greatestPriority;
- int greatestPriority__val;
- } hipDeviceGetStreamPriorityRange;
- struct {
- hipModule_t* module;
- hipModule_t module__val;
- const char* fname;
- char fname__val;
- } hipModuleLoad;
- struct {
- hipDevice_t dev;
- unsigned int flags;
- } hipDevicePrimaryCtxSetFlags;
- struct {
- const void* f;
- dim3 gridDim;
- dim3 blockDimX;
- void** kernelParams;
- void* kernelParams__val;
- unsigned int sharedMemBytes;
- hipStream_t stream;
- } hipLaunchCooperativeKernel;
- struct {
- hipLaunchParams* launchParamsList;
- hipLaunchParams launchParamsList__val;
- int numDevices;
- unsigned int flags;
- } hipLaunchCooperativeKernelMultiDevice;
- struct {
- void* dst;
- const void* src;
- size_t sizeBytes;
- hipMemcpyKind kind;
- hipStream_t stream;
- } hipMemcpyAsync;
- struct {
- hipArray_t* array;
- hipArray_t array__val;
- const hipChannelFormatDesc* desc;
- hipChannelFormatDesc desc__val;
- hipExtent extent;
- unsigned int flags;
- } hipMalloc3DArray;
- struct {
- void** ptr;
- void* ptr__val;
- size_t size;
- } hipMallocHost;
- struct {
- hipCtx_t* ctx;
- hipCtx_t ctx__val;
- } hipCtxGetCurrent;
- struct {
- hipDevice_t dev;
- unsigned int* flags;
- unsigned int flags__val;
- int* active;
- int active__val;
- } hipDevicePrimaryCtxGetState;
- struct {
- hipEvent_t event;
- } hipEventQuery;
- struct {
- hipEvent_t* event;
- hipEvent_t event__val;
- } hipEventCreate;
- struct {
- hipDeviceptr_t* pbase;
- hipDeviceptr_t pbase__val;
- size_t* psize;
- size_t psize__val;
- hipDeviceptr_t dptr;
- } hipMemGetAddressRange;
- struct {
- void* dst;
- const void* symbol;
- size_t sizeBytes;
- size_t offset;
- hipMemcpyKind kind;
- } hipMemcpyFromSymbol;
- struct {
- hipArray** pHandle;
- hipArray* pHandle__val;
- const HIP_ARRAY_DESCRIPTOR* pAllocateArray;
- HIP_ARRAY_DESCRIPTOR pAllocateArray__val;
- } hipArrayCreate;
- struct {
- hipStream_t stream;
- hipDeviceptr_t* dev_ptr;
- hipDeviceptr_t dev_ptr__val;
- size_t length;
- unsigned int flags;
- } hipStreamAttachMemAsync;
- struct {
- hipStream_t stream;
- unsigned int* flags;
- unsigned int flags__val;
- } hipStreamGetFlags;
- struct {
- hipArray** array;
- hipArray* array__val;
- const hipChannelFormatDesc* desc;
- hipChannelFormatDesc desc__val;
- size_t width;
- size_t height;
- unsigned int flags;
- } hipMallocArray;
- struct {
- hipSharedMemConfig* pConfig;
- hipSharedMemConfig pConfig__val;
- } hipCtxGetSharedMemConfig;
- struct {
- int peerDeviceId;
- } hipDeviceDisablePeerAccess;
- struct {
- int* gridSize;
- int gridSize__val;
- int* blockSize;
- int blockSize__val;
- hipFunction_t f;
- size_t dynSharedMemPerBlk;
- int blockSizeLimit;
- } hipModuleOccupancyMaxPotentialBlockSize;
- struct {
- void* ptr;
- size_t* size;
- size_t size__val;
- } hipMemPtrGetInfo;
- struct {
- int* value;
- int value__val;
- hipFunction_attribute attrib;
- hipFunction_t hfunc;
- } hipFuncGetAttribute;
- struct {
- unsigned int* flags;
- unsigned int flags__val;
- } hipCtxGetFlags;
- struct {
- hipStream_t stream;
- } hipStreamDestroy;
- struct {
- dim3 gridDim;
- dim3 blockDim;
- size_t sharedMem;
- hipStream_t stream;
- } __hipPushCallConfiguration;
- struct {
- hipPitchedPtr pitchedDevPtr;
- int value;
- hipExtent extent;
- hipStream_t stream;
- } hipMemset3DAsync;
- struct {
- char* pciBusId;
- char pciBusId__val;
- int len;
- int device;
- } hipDeviceGetPCIBusId;
- struct {
- unsigned int flags;
- } hipInit;
- struct {
- void* dst;
- hipArray* srcArray;
- hipArray srcArray__val;
- size_t srcOffset;
- size_t count;
- } hipMemcpyAtoH;
- struct {
- hipStream_t stream;
- int* priority;
- int priority__val;
- } hipStreamGetPriority;
- struct {
- void* dst;
- size_t pitch;
- int value;
- size_t width;
- size_t height;
- } hipMemset2D;
- struct {
- void* dst;
- size_t pitch;
- int value;
- size_t width;
- size_t height;
- hipStream_t stream;
- } hipMemset2DAsync;
- struct {
- int* canAccessPeer;
- int canAccessPeer__val;
- int deviceId;
- int peerDeviceId;
- } hipDeviceCanAccessPeer;
- struct {
- const void* hostFunction;
- } hipLaunchByPtr;
- struct {
- const void* dev_ptr;
- size_t count;
- int device;
- hipStream_t stream;
- } hipMemPrefetchAsync;
- struct {
- hipCtx_t ctx;
- } hipCtxDestroy;
- struct {
- hipDeviceptr_t dest;
- unsigned short value;
- size_t count;
- hipStream_t stream;
- } hipMemsetD16Async;
- struct {
- hipModule_t module;
- } hipModuleUnload;
- struct {
- void* hostPtr;
- } hipHostUnregister;
- struct {
- hipStream_t* stream;
- hipStream_t stream__val;
- unsigned int cuMaskSize;
- const unsigned int* cuMask;
- unsigned int cuMask__val;
- } hipExtStreamCreateWithCUMask;
- struct {
- hipStream_t stream;
- } hipStreamSynchronize;
- struct {
- void* ptr;
- } hipFreeHost;
- struct {
- hipFuncCache_t cacheConfig;
- } hipDeviceSetCacheConfig;
- struct {
- hipDeviceptr_t dst;
- void* src;
- size_t sizeBytes;
- } hipMemcpyHtoD;
- struct {
- hipDeviceptr_t* dptr;
- hipDeviceptr_t dptr__val;
- size_t* bytes;
- size_t bytes__val;
- hipModule_t hmod;
- const char* name;
- char name__val;
- } hipModuleGetGlobal;
- struct {
- hipArray* dstArray;
- hipArray dstArray__val;
- size_t dstOffset;
- const void* srcHost;
- size_t count;
- } hipMemcpyHtoA;
- struct {
- hipCtx_t* ctx;
- hipCtx_t ctx__val;
- unsigned int flags;
- hipDevice_t device;
- } hipCtxCreate;
- struct {
- void* dst;
- size_t dpitch;
- const void* src;
- size_t spitch;
- size_t width;
- size_t height;
- hipMemcpyKind kind;
- } hipMemcpy2D;
- struct {
- void* devPtr;
- } hipIpcCloseMemHandle;
- struct {
- int* device;
- int device__val;
- const hipDeviceProp_t* prop;
- hipDeviceProp_t prop__val;
- } hipChooseDevice;
- struct {
- hipSharedMemConfig config;
- } hipDeviceSetSharedMemConfig;
- struct {
- hipMipmappedArray_t* mipmappedArray;
- hipMipmappedArray_t mipmappedArray__val;
- const hipChannelFormatDesc* desc;
- hipChannelFormatDesc desc__val;
- hipExtent extent;
- unsigned int numLevels;
- unsigned int flags;
- } hipMallocMipmappedArray;
- struct {
- const void* arg;
- size_t size;
- size_t offset;
- } hipSetupArgument;
- struct {
- hipIpcEventHandle_t* handle;
- hipIpcEventHandle_t handle__val;
- hipEvent_t event;
- } hipIpcGetEventHandle;
- struct {
- hipArray* array;
- hipArray array__val;
- } hipFreeArray;
- struct {
- hipFuncCache_t cacheConfig;
- } hipCtxSetCacheConfig;
- struct {
- const void* func;
- hipFuncCache_t config;
- } hipFuncSetCacheConfig;
- struct {
- const void* function_address;
- dim3 numBlocks;
- dim3 dimBlocks;
- void** args;
- void* args__val;
- size_t sharedMemBytes;
- hipStream_t stream;
- } hipLaunchKernel;
- struct {
- int* numBlocks;
- int numBlocks__val;
- hipFunction_t f;
- int blockSize;
- size_t dynSharedMemPerBlk;
- unsigned int flags;
- } hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags;
- struct {
- textureReference** texRef;
- textureReference* texRef__val;
- hipModule_t hmod;
- const char* name;
- char name__val;
- } hipModuleGetTexRef;
- struct {
- const void* func;
- hipFuncAttribute attr;
- int value;
- } hipFuncSetAttribute;
- struct {
- float* ms;
- float ms__val;
- hipEvent_t start;
- hipEvent_t stop;
- } hipEventElapsedTime;
- struct {
- dim3 gridDim;
- dim3 blockDim;
- size_t sharedMem;
- hipStream_t stream;
- } hipConfigureCall;
- struct {
- const void* dev_ptr;
- size_t count;
- hipMemoryAdvise advice;
- int device;
- } hipMemAdvise;
- struct {
- const hipMemcpy3DParms* p;
- hipMemcpy3DParms p__val;
- hipStream_t stream;
- } hipMemcpy3DAsync;
- struct {
- hipEvent_t event;
- } hipEventDestroy;
- struct {
- hipCtx_t* ctx;
- hipCtx_t ctx__val;
- } hipCtxPopCurrent;
- struct {
- void** devPtr;
- void* devPtr__val;
- const void* symbol;
- } hipGetSymbolAddress;
- struct {
- unsigned int* flagsPtr;
- unsigned int flagsPtr__val;
- void* hostPtr;
- } hipHostGetFlags;
- struct {
- void** ptr;
- void* ptr__val;
- size_t size;
- unsigned int flags;
- } hipHostMalloc;
- struct {
- hipSharedMemConfig config;
- } hipCtxSetSharedMemConfig;
- struct {
- hipMipmappedArray_t mipmappedArray;
- } hipFreeMipmappedArray;
- struct {
- size_t* free;
- size_t free__val;
- size_t* total;
- size_t total__val;
- } hipMemGetInfo;
- struct {
- void* dst;
- int value;
- size_t sizeBytes;
- } hipMemset;
- struct {
- hipDeviceptr_t dest;
- unsigned char value;
- size_t count;
- } hipMemsetD8;
- struct {
- const hip_Memcpy2D* pCopy;
- hip_Memcpy2D pCopy__val;
- hipStream_t stream;
- } hipMemcpyParam2DAsync;
- struct {
- void* hostPtr;
- size_t sizeBytes;
- unsigned int flags;
- } hipHostRegister;
- struct {
- int* driverVersion;
- int driverVersion__val;
- } hipDriverGetVersion;
- struct {
- hipArray** array;
- hipArray* array__val;
- const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray;
- HIP_ARRAY3D_DESCRIPTOR pAllocateArray__val;
- } hipArray3DCreate;
- struct {
- void** devPtr;
- void* devPtr__val;
- hipIpcMemHandle_t handle;
- unsigned int flags;
- } hipIpcOpenMemHandle;
- struct {
- unsigned int* flags;
- unsigned int flags__val;
- } hipGetDeviceFlags;
- struct {
- hipSharedMemConfig* pConfig;
- hipSharedMemConfig pConfig__val;
- } hipDeviceGetSharedMemConfig;
- struct {
- const HIP_MEMCPY3D* pCopy;
- HIP_MEMCPY3D pCopy__val;
- } hipDrvMemcpy3D;
- struct {
- void* dst;
- size_t dpitch;
- hipArray_const_t src;
- size_t wOffset;
- size_t hOffset;
- size_t width;
- size_t height;
- hipMemcpyKind kind;
- } hipMemcpy2DFromArray;
- struct {
- int* numBlocks;
- int numBlocks__val;
- const void* f;
- int blockSize;
- size_t dynamicSMemSize;
- unsigned int flags;
- } hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags;
- struct {
- unsigned int flags;
- } hipSetDeviceFlags;
- struct {
- hipFunction_t f;
- unsigned int globalWorkSizeX;
- unsigned int globalWorkSizeY;
- unsigned int globalWorkSizeZ;
- unsigned int blockDimX;
- unsigned int blockDimY;
- unsigned int blockDimZ;
- size_t sharedMemBytes;
- hipStream_t hStream;
- void** kernelParams;
- void* kernelParams__val;
- void** extra;
- void* extra__val;
- hipEvent_t startEvent;
- hipEvent_t stopEvent;
- } hipHccModuleLaunchKernel;
- struct {
- void* ptr;
- } hipFree;
- struct {
- int* gridSize;
- int gridSize__val;
- int* blockSize;
- int blockSize__val;
- const void* f;
- size_t dynSharedMemPerBlk;
- int blockSizeLimit;
- } hipOccupancyMaxPotentialBlockSize;
- struct {
- int* pi;
- int pi__val;
- hipDeviceAttribute_t attr;
- int deviceId;
- } hipDeviceGetAttribute;
- struct {
- int* major;
- int major__val;
- int* minor;
- int minor__val;
- hipDevice_t device;
- } hipDeviceComputeCapability;
- struct {
- hipCtx_t peerCtx;
- } hipCtxDisablePeerAccess;
- struct {
- void** dev_ptr;
- void* dev_ptr__val;
- size_t size;
- unsigned int flags;
- } hipMallocManaged;
- struct {
- int* device;
- int device__val;
- const char* pciBusId;
- char pciBusId__val;
- } hipDeviceGetByPCIBusId;
- struct {
- hipIpcMemHandle_t* handle;
- hipIpcMemHandle_t handle__val;
- void* devPtr;
- } hipIpcGetMemHandle;
- struct {
- hipDeviceptr_t dst;
- void* src;
- size_t sizeBytes;
- hipStream_t stream;
- } hipMemcpyHtoDAsync;
- struct {
- hipDevice_t* device;
- hipDevice_t device__val;
- } hipCtxGetDevice;
- struct {
- hipDeviceptr_t dst;
- hipDeviceptr_t src;
- size_t sizeBytes;
- } hipMemcpyDtoD;
- struct {
- hipModule_t* module;
- hipModule_t module__val;
- const void* image;
- } hipModuleLoadData;
- struct {
- hipDevice_t dev;
- } hipDevicePrimaryCtxRelease;
- struct {
- int* numBlocks;
- int numBlocks__val;
- const void* f;
- int blockSize;
- size_t dynamicSMemSize;
- } hipOccupancyMaxActiveBlocksPerMultiprocessor;
- struct {
- hipCtx_t ctx;
- } hipCtxSetCurrent;
- struct {
- hipStream_t* stream;
- hipStream_t stream__val;
- } hipStreamCreate;
- struct {
- hipCtx_t* pctx;
- hipCtx_t pctx__val;
- hipDevice_t dev;
- } hipDevicePrimaryCtxRetain;
- struct {
- hipDevice_t* device;
- hipDevice_t device__val;
- int ordinal;
- } hipDeviceGet;
- struct {
- hipStream_t* stream;
- hipStream_t stream__val;
- unsigned int flags;
- } hipStreamCreateWithFlags;
- struct {
- void* dst;
- hipArray_const_t srcArray;
- size_t wOffset;
- size_t hOffset;
- size_t count;
- hipMemcpyKind kind;
- } hipMemcpyFromArray;
- struct {
- void* dst;
- size_t dpitch;
- const void* src;
- size_t spitch;
- size_t width;
- size_t height;
- hipMemcpyKind kind;
- hipStream_t stream;
- } hipMemcpy2DAsync;
- struct {
- hipFuncAttributes* attr;
- hipFuncAttributes attr__val;
- const void* func;
- } hipFuncGetAttributes;
- struct {
- size_t* size;
- size_t size__val;
- const void* symbol;
- } hipGetSymbolSize;
- struct {
- void* ptr;
- } hipHostFree;
- struct {
- hipEvent_t* event;
- hipEvent_t event__val;
- unsigned int flags;
- } hipEventCreateWithFlags;
- struct {
- hipStream_t stream;
- } hipStreamQuery;
- struct {
- const hipMemcpy3DParms* p;
- hipMemcpy3DParms p__val;
- } hipMemcpy3D;
- struct {
- const void* symbol;
- const void* src;
- size_t sizeBytes;
- size_t offset;
- hipMemcpyKind kind;
- } hipMemcpyToSymbol;
- struct {
- void* dst;
- const void* src;
- size_t sizeBytes;
- hipMemcpyKind kind;
- } hipMemcpy;
- struct {
- hipLaunchParams* launchParamsList;
- hipLaunchParams launchParamsList__val;
- int numDevices;
- unsigned int flags;
- } hipExtLaunchMultiKernelMultiDevice;
- struct {
- void** ptr;
- void* ptr__val;
- size_t size;
- unsigned int flags;
- } hipHostAlloc;
- struct {
- hipStream_t stream;
- hipStreamCallback_t callback;
- void* userData;
- unsigned int flags;
- } hipStreamAddCallback;
- struct {
- hipArray* dst;
- hipArray dst__val;
- size_t wOffset;
- size_t hOffset;
- const void* src;
- size_t count;
- hipMemcpyKind kind;
- } hipMemcpyToArray;
- struct {
- hipDeviceptr_t dest;
- int value;
- size_t count;
- } hipMemsetD32;
- struct {
- hipFunction_t f;
- unsigned int globalWorkSizeX;
- unsigned int globalWorkSizeY;
- unsigned int globalWorkSizeZ;
- unsigned int localWorkSizeX;
- unsigned int localWorkSizeY;
- unsigned int localWorkSizeZ;
- size_t sharedMemBytes;
- hipStream_t hStream;
- void** kernelParams;
- void* kernelParams__val;
- void** extra;
- void* extra__val;
- hipEvent_t startEvent;
- hipEvent_t stopEvent;
- unsigned int flags;
- } hipExtModuleLaunchKernel;
- struct {
- hipFuncCache_t* cacheConfig;
- hipFuncCache_t cacheConfig__val;
- } hipDeviceGetCacheConfig;
- struct {
- hipPitchedPtr* pitchedDevPtr;
- hipPitchedPtr pitchedDevPtr__val;
- hipExtent extent;
- } hipMalloc3D;
- struct {
- hipPointerAttribute_t* attributes;
- hipPointerAttribute_t attributes__val;
- const void* ptr;
- } hipPointerGetAttributes;
- struct {
- void* dst;
- int value;
- size_t sizeBytes;
- hipStream_t stream;
- } hipMemsetAsync;
- struct {
- char* name;
- char name__val;
- int len;
- hipDevice_t device;
- } hipDeviceGetName;
- struct {
- int* gridSize;
- int gridSize__val;
- int* blockSize;
- int blockSize__val;
- hipFunction_t f;
- size_t dynSharedMemPerBlk;
- int blockSizeLimit;
- unsigned int flags;
- } hipModuleOccupancyMaxPotentialBlockSizeWithFlags;
- struct {
- hipCtx_t ctx;
- } hipCtxPushCurrent;
- struct {
- void* dst;
- int dstDeviceId;
- const void* src;
- int srcDeviceId;
- size_t sizeBytes;
- } hipMemcpyPeer;
- struct {
- hipEvent_t event;
- } hipEventSynchronize;
- struct {
- hipDeviceptr_t dst;
- hipDeviceptr_t src;
- size_t sizeBytes;
- hipStream_t stream;
- } hipMemcpyDtoDAsync;
- struct {
- void** ptr;
- void* ptr__val;
- size_t sizeBytes;
- unsigned int flags;
- } hipExtMallocWithFlags;
- struct {
- hipCtx_t peerCtx;
- unsigned int flags;
- } hipCtxEnablePeerAccess;
- struct {
- void** ptr;
- void* ptr__val;
- size_t size;
- } hipMemAllocHost;
- struct {
- void* dst;
- hipDeviceptr_t src;
- size_t sizeBytes;
- hipStream_t stream;
- } hipMemcpyDtoHAsync;
- struct {
- hipFunction_t f;
- unsigned int gridDimX;
- unsigned int gridDimY;
- unsigned int gridDimZ;
- unsigned int blockDimX;
- unsigned int blockDimY;
- unsigned int blockDimZ;
- unsigned int sharedMemBytes;
- hipStream_t stream;
- void** kernelParams;
- void* kernelParams__val;
- void** extra;
- void* extra__val;
- } hipModuleLaunchKernel;
- struct {
- hipDeviceptr_t* dptr;
- hipDeviceptr_t dptr__val;
- size_t* pitch;
- size_t pitch__val;
- size_t widthInBytes;
- size_t height;
- unsigned int elementSizeBytes;
- } hipMemAllocPitch;
- struct {
- const void* function_address;
- dim3 numBlocks;
- dim3 dimBlocks;
- void** args;
- void* args__val;
- size_t sharedMemBytes;
- hipStream_t stream;
- hipEvent_t startEvent;
- hipEvent_t stopEvent;
- int flags;
- } hipExtLaunchKernel;
- struct {
- void* dst;
- size_t dpitch;
- hipArray_const_t src;
- size_t wOffset;
- size_t hOffset;
- size_t width;
- size_t height;
- hipMemcpyKind kind;
- hipStream_t stream;
- } hipMemcpy2DFromArrayAsync;
- struct {
- size_t* pValue;
- size_t pValue__val;
- enum hipLimit_t limit;
- } hipDeviceGetLimit;
- struct {
- hipModule_t* module;
- hipModule_t module__val;
- const void* image;
- unsigned int numOptions;
- hipJitOption* options;
- hipJitOption options__val;
- void** optionsValues;
- void* optionsValues__val;
- } hipModuleLoadDataEx;
- struct {
- int* runtimeVersion;
- int runtimeVersion__val;
- } hipRuntimeGetVersion;
- struct {
- void* data;
- size_t data_size;
- hipMemRangeAttribute attribute;
- const void* dev_ptr;
- size_t count;
- } hipMemRangeGetAttribute;
- struct {
- int* value;
- int value__val;
- hipDeviceP2PAttr attr;
- int srcDevice;
- int dstDevice;
- } hipDeviceGetP2PAttribute;
- struct {
- void* dst;
- int dstDeviceId;
- const void* src;
- int srcDevice;
- size_t sizeBytes;
- hipStream_t stream;
- } hipMemcpyPeerAsync;
- struct {
- hipDeviceProp_t* props;
- hipDeviceProp_t props__val;
- hipDevice_t device;
- } hipGetDeviceProperties;
- struct {
- void* dst;
- hipDeviceptr_t src;
- size_t sizeBytes;
- } hipMemcpyDtoH;
- struct {
- void* dst;
- const void* src;
- size_t sizeBytes;
- hipMemcpyKind kind;
- hipStream_t stream;
- } hipMemcpyWithStream;
- struct {
- size_t* bytes;
- size_t bytes__val;
- hipDevice_t device;
- } hipDeviceTotalMem;
- struct {
- void** devPtr;
- void* devPtr__val;
- void* hstPtr;
- unsigned int flags;
- } hipHostGetDevicePointer;
- struct {
- void** data;
- void* data__val;
- size_t* data_sizes;
- size_t data_sizes__val;
- hipMemRangeAttribute* attributes;
- hipMemRangeAttribute attributes__val;
- size_t num_attributes;
- const void* dev_ptr;
- size_t count;
- } hipMemRangeGetAttributes;
- struct {
- const hip_Memcpy2D* pCopy;
- hip_Memcpy2D pCopy__val;
- } hipMemcpyParam2D;
- struct {
- hipDevice_t dev;
- } hipDevicePrimaryCtxReset;
- struct {
- hipArray_t* levelArray;
- hipArray_t levelArray__val;
- hipMipmappedArray_const_t mipmappedArray;
- unsigned int level;
- } hipGetMipmappedArrayLevel;
- struct {
- hipDeviceptr_t dst;
- int value;
- size_t count;
- hipStream_t stream;
- } hipMemsetD32Async;
- struct {
- int* deviceId;
- int deviceId__val;
- } hipGetDevice;
- struct {
- int* count;
- int count__val;
- } hipGetDeviceCount;
- struct {
- hipEvent_t* event;
- hipEvent_t event__val;
- hipIpcEventHandle_t handle;
- } hipIpcOpenEventHandle;
- } args;
-} hip_api_data_t;
-
-// HIP API callbacks args data filling macros
-// hipDrvMemcpy3DAsync[('const HIP_MEMCPY3D*', 'pCopy'), ('hipStream_t', 'stream')]
-#define INIT_hipDrvMemcpy3DAsync_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipDrvMemcpy3DAsync.pCopy = (const HIP_MEMCPY3D*)pCopy; \
- cb_data.args.hipDrvMemcpy3DAsync.stream = (hipStream_t)stream; \
-};
-// hipDeviceEnablePeerAccess[('int', 'peerDeviceId'), ('unsigned int', 'flags')]
-#define INIT_hipDeviceEnablePeerAccess_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipDeviceEnablePeerAccess.peerDeviceId = (int)peerDeviceId; \
- cb_data.args.hipDeviceEnablePeerAccess.flags = (unsigned int)flags; \
-};
-// hipFuncSetSharedMemConfig[('const void*', 'func'), ('hipSharedMemConfig', 'config')]
-#define INIT_hipFuncSetSharedMemConfig_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipFuncSetSharedMemConfig.func = (const void*)func; \
- cb_data.args.hipFuncSetSharedMemConfig.config = (hipSharedMemConfig)config; \
-};
-// hipMemcpyToSymbolAsync[('const void*', 'symbol'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')]
-#define INIT_hipMemcpyToSymbolAsync_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemcpyToSymbolAsync.symbol = (const void*)symbol; \
- cb_data.args.hipMemcpyToSymbolAsync.src = (const void*)src; \
- cb_data.args.hipMemcpyToSymbolAsync.sizeBytes = (size_t)sizeBytes; \
- cb_data.args.hipMemcpyToSymbolAsync.offset = (size_t)offset; \
- cb_data.args.hipMemcpyToSymbolAsync.kind = (hipMemcpyKind)kind; \
- cb_data.args.hipMemcpyToSymbolAsync.stream = (hipStream_t)stream; \
-};
-// hipMallocPitch[('void**', 'ptr'), ('size_t*', 'pitch'), ('size_t', 'width'), ('size_t', 'height')]
-#define INIT_hipMallocPitch_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMallocPitch.ptr = (void**)ptr; \
- cb_data.args.hipMallocPitch.pitch = (size_t*)pitch; \
- cb_data.args.hipMallocPitch.width = (size_t)width; \
- cb_data.args.hipMallocPitch.height = (size_t)height; \
-};
-// hipMalloc[('void**', 'ptr'), ('size_t', 'size')]
-#define INIT_hipMalloc_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMalloc.ptr = (void**)ptr; \
- cb_data.args.hipMalloc.size = (size_t)sizeBytes; \
-};
-// hipMemsetD16[('hipDeviceptr_t', 'dest'), ('unsigned short', 'value'), ('size_t', 'count')]
-#define INIT_hipMemsetD16_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemsetD16.dest = (hipDeviceptr_t)dst; \
- cb_data.args.hipMemsetD16.value = (unsigned short)value; \
- cb_data.args.hipMemsetD16.count = (size_t)count; \
-};
-// hipExtStreamGetCUMask[('hipStream_t', 'stream'), ('unsigned int', 'cuMaskSize'), ('unsigned int*', 'cuMask')]
-#define INIT_hipExtStreamGetCUMask_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipExtStreamGetCUMask.stream = (hipStream_t)stream; \
- cb_data.args.hipExtStreamGetCUMask.cuMaskSize = (unsigned int)cuMaskSize; \
- cb_data.args.hipExtStreamGetCUMask.cuMask = (unsigned int*)cuMask; \
-};
-// hipEventRecord[('hipEvent_t', 'event'), ('hipStream_t', 'stream')]
-#define INIT_hipEventRecord_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipEventRecord.event = (hipEvent_t)event; \
- cb_data.args.hipEventRecord.stream = (hipStream_t)stream; \
-};
-// hipCtxSynchronize[]
-#define INIT_hipCtxSynchronize_CB_ARGS_DATA(cb_data) { \
-};
-// hipSetDevice[('int', 'deviceId')]
-#define INIT_hipSetDevice_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipSetDevice.deviceId = (int)device; \
-};
-// hipCtxGetApiVersion[('hipCtx_t', 'ctx'), ('int*', 'apiVersion')]
-#define INIT_hipCtxGetApiVersion_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipCtxGetApiVersion.ctx = (hipCtx_t)ctx; \
- cb_data.args.hipCtxGetApiVersion.apiVersion = (int*)apiVersion; \
-};
-// hipMemcpyFromSymbolAsync[('void*', 'dst'), ('const void*', 'symbol'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')]
-#define INIT_hipMemcpyFromSymbolAsync_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemcpyFromSymbolAsync.dst = (void*)dst; \
- cb_data.args.hipMemcpyFromSymbolAsync.symbol = (const void*)symbol; \
- cb_data.args.hipMemcpyFromSymbolAsync.sizeBytes = (size_t)sizeBytes; \
- cb_data.args.hipMemcpyFromSymbolAsync.offset = (size_t)offset; \
- cb_data.args.hipMemcpyFromSymbolAsync.kind = (hipMemcpyKind)kind; \
- cb_data.args.hipMemcpyFromSymbolAsync.stream = (hipStream_t)stream; \
-};
-// hipExtGetLinkTypeAndHopCount[('int', 'device1'), ('int', 'device2'), ('unsigned int*', 'linktype'), ('unsigned int*', 'hopcount')]
-#define INIT_hipExtGetLinkTypeAndHopCount_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipExtGetLinkTypeAndHopCount.device1 = (int)device1; \
- cb_data.args.hipExtGetLinkTypeAndHopCount.device2 = (int)device2; \
- cb_data.args.hipExtGetLinkTypeAndHopCount.linktype = (unsigned int*)linktype; \
- cb_data.args.hipExtGetLinkTypeAndHopCount.hopcount = (unsigned int*)hopcount; \
-};
-// __hipPopCallConfiguration[('dim3*', 'gridDim'), ('dim3*', 'blockDim'), ('size_t*', 'sharedMem'), ('hipStream_t*', 'stream')]
-#define INIT___hipPopCallConfiguration_CB_ARGS_DATA(cb_data) { \
- cb_data.args.__hipPopCallConfiguration.gridDim = (dim3*)gridDim; \
- cb_data.args.__hipPopCallConfiguration.blockDim = (dim3*)blockDim; \
- cb_data.args.__hipPopCallConfiguration.sharedMem = (size_t*)sharedMem; \
- cb_data.args.__hipPopCallConfiguration.stream = (hipStream_t*)stream; \
-};
-// hipModuleOccupancyMaxActiveBlocksPerMultiprocessor[('int*', 'numBlocks'), ('hipFunction_t', 'f'), ('int', 'blockSize'), ('size_t', 'dynSharedMemPerBlk')]
-#define INIT_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks = (int*)numBlocks; \
- cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.f = (hipFunction_t)f; \
- cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.blockSize = (int)blockSize; \
- cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.dynSharedMemPerBlk = (size_t)dynSharedMemPerBlk; \
-};
-// hipMemset3D[('hipPitchedPtr', 'pitchedDevPtr'), ('int', 'value'), ('hipExtent', 'extent')]
-#define INIT_hipMemset3D_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemset3D.pitchedDevPtr = (hipPitchedPtr)pitchedDevPtr; \
- cb_data.args.hipMemset3D.value = (int)value; \
- cb_data.args.hipMemset3D.extent = (hipExtent)extent; \
-};
-// hipStreamCreateWithPriority[('hipStream_t*', 'stream'), ('unsigned int', 'flags'), ('int', 'priority')]
-#define INIT_hipStreamCreateWithPriority_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipStreamCreateWithPriority.stream = (hipStream_t*)stream; \
- cb_data.args.hipStreamCreateWithPriority.flags = (unsigned int)flags; \
- cb_data.args.hipStreamCreateWithPriority.priority = (int)priority; \
-};
-// hipMemcpy2DToArray[('hipArray*', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind')]
-#define INIT_hipMemcpy2DToArray_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemcpy2DToArray.dst = (hipArray*)dst; \
- cb_data.args.hipMemcpy2DToArray.wOffset = (size_t)wOffset; \
- cb_data.args.hipMemcpy2DToArray.hOffset = (size_t)hOffset; \
- cb_data.args.hipMemcpy2DToArray.src = (const void*)src; \
- cb_data.args.hipMemcpy2DToArray.spitch = (size_t)spitch; \
- cb_data.args.hipMemcpy2DToArray.width = (size_t)width; \
- cb_data.args.hipMemcpy2DToArray.height = (size_t)height; \
- cb_data.args.hipMemcpy2DToArray.kind = (hipMemcpyKind)kind; \
-};
-// hipMemsetD8Async[('hipDeviceptr_t', 'dest'), ('unsigned char', 'value'), ('size_t', 'count'), ('hipStream_t', 'stream')]
-#define INIT_hipMemsetD8Async_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemsetD8Async.dest = (hipDeviceptr_t)dst; \
- cb_data.args.hipMemsetD8Async.value = (unsigned char)value; \
- cb_data.args.hipMemsetD8Async.count = (size_t)count; \
- cb_data.args.hipMemsetD8Async.stream = (hipStream_t)stream; \
-};
-// hipCtxGetCacheConfig[('hipFuncCache_t*', 'cacheConfig')]
-#define INIT_hipCtxGetCacheConfig_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipCtxGetCacheConfig.cacheConfig = (hipFuncCache_t*)cacheConfig; \
-};
-// hipModuleGetFunction[('hipFunction_t*', 'function'), ('hipModule_t', 'module'), ('const char*', 'kname')]
-#define INIT_hipModuleGetFunction_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipModuleGetFunction.function = (hipFunction_t*)hfunc; \
- cb_data.args.hipModuleGetFunction.module = (hipModule_t)hmod; \
- cb_data.args.hipModuleGetFunction.kname = (name) ? strdup(name) : NULL; \
-};
-// hipStreamWaitEvent[('hipStream_t', 'stream'), ('hipEvent_t', 'event'), ('unsigned int', 'flags')]
-#define INIT_hipStreamWaitEvent_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipStreamWaitEvent.stream = (hipStream_t)stream; \
- cb_data.args.hipStreamWaitEvent.event = (hipEvent_t)event; \
- cb_data.args.hipStreamWaitEvent.flags = (unsigned int)flags; \
-};
-// hipDeviceGetStreamPriorityRange[('int*', 'leastPriority'), ('int*', 'greatestPriority')]
-#define INIT_hipDeviceGetStreamPriorityRange_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipDeviceGetStreamPriorityRange.leastPriority = (int*)leastPriority; \
- cb_data.args.hipDeviceGetStreamPriorityRange.greatestPriority = (int*)greatestPriority; \
-};
-// hipModuleLoad[('hipModule_t*', 'module'), ('const char*', 'fname')]
-#define INIT_hipModuleLoad_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipModuleLoad.module = (hipModule_t*)module; \
- cb_data.args.hipModuleLoad.fname = (fname) ? strdup(fname) : NULL; \
-};
-// hipDevicePrimaryCtxSetFlags[('hipDevice_t', 'dev'), ('unsigned int', 'flags')]
-#define INIT_hipDevicePrimaryCtxSetFlags_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipDevicePrimaryCtxSetFlags.dev = (hipDevice_t)dev; \
- cb_data.args.hipDevicePrimaryCtxSetFlags.flags = (unsigned int)flags; \
-};
-// hipLaunchCooperativeKernel[('const void*', 'f'), ('dim3', 'gridDim'), ('dim3', 'blockDimX'), ('void**', 'kernelParams'), ('unsigned int', 'sharedMemBytes'), ('hipStream_t', 'stream')]
-#define INIT_hipLaunchCooperativeKernel_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipLaunchCooperativeKernel.f = (const void*)f; \
- cb_data.args.hipLaunchCooperativeKernel.gridDim = (dim3)gridDim; \
- cb_data.args.hipLaunchCooperativeKernel.blockDimX = (dim3)blockDim; \
- cb_data.args.hipLaunchCooperativeKernel.kernelParams = (void**)kernelParams; \
- cb_data.args.hipLaunchCooperativeKernel.sharedMemBytes = (unsigned int)sharedMemBytes; \
- cb_data.args.hipLaunchCooperativeKernel.stream = (hipStream_t)hStream; \
-};
-// hipLaunchCooperativeKernelMultiDevice[('hipLaunchParams*', 'launchParamsList'), ('int', 'numDevices'), ('unsigned int', 'flags')]
-#define INIT_hipLaunchCooperativeKernelMultiDevice_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipLaunchCooperativeKernelMultiDevice.launchParamsList = (hipLaunchParams*)launchParamsList; \
- cb_data.args.hipLaunchCooperativeKernelMultiDevice.numDevices = (int)numDevices; \
- cb_data.args.hipLaunchCooperativeKernelMultiDevice.flags = (unsigned int)flags; \
-};
-// hipMemcpyAsync[('void*', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')]
-#define INIT_hipMemcpyAsync_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemcpyAsync.dst = (void*)dst; \
- cb_data.args.hipMemcpyAsync.src = (const void*)src; \
- cb_data.args.hipMemcpyAsync.sizeBytes = (size_t)sizeBytes; \
- cb_data.args.hipMemcpyAsync.kind = (hipMemcpyKind)kind; \
- cb_data.args.hipMemcpyAsync.stream = (hipStream_t)stream; \
-};
-// hipMalloc3DArray[('hipArray_t*', 'array'), ('const hipChannelFormatDesc*', 'desc'), ('hipExtent', 'extent'), ('unsigned int', 'flags')]
-#define INIT_hipMalloc3DArray_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMalloc3DArray.array = (hipArray_t*)array; \
- cb_data.args.hipMalloc3DArray.desc = (const hipChannelFormatDesc*)desc; \
- cb_data.args.hipMalloc3DArray.extent = (hipExtent)extent; \
- cb_data.args.hipMalloc3DArray.flags = (unsigned int)flags; \
-};
-// hipMallocHost[('void**', 'ptr'), ('size_t', 'size')]
-#define INIT_hipMallocHost_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMallocHost.ptr = (void**)ptr; \
- cb_data.args.hipMallocHost.size = (size_t)size; \
-};
-// hipCtxGetCurrent[('hipCtx_t*', 'ctx')]
-#define INIT_hipCtxGetCurrent_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipCtxGetCurrent.ctx = (hipCtx_t*)ctx; \
-};
-// hipDevicePrimaryCtxGetState[('hipDevice_t', 'dev'), ('unsigned int*', 'flags'), ('int*', 'active')]
-#define INIT_hipDevicePrimaryCtxGetState_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipDevicePrimaryCtxGetState.dev = (hipDevice_t)dev; \
- cb_data.args.hipDevicePrimaryCtxGetState.flags = (unsigned int*)flags; \
- cb_data.args.hipDevicePrimaryCtxGetState.active = (int*)active; \
-};
-// hipEventQuery[('hipEvent_t', 'event')]
-#define INIT_hipEventQuery_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipEventQuery.event = (hipEvent_t)event; \
-};
-// hipEventCreate[('hipEvent_t*', 'event')]
-#define INIT_hipEventCreate_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipEventCreate.event = (hipEvent_t*)event; \
-};
-// hipMemGetAddressRange[('hipDeviceptr_t*', 'pbase'), ('size_t*', 'psize'), ('hipDeviceptr_t', 'dptr')]
-#define INIT_hipMemGetAddressRange_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemGetAddressRange.pbase = (hipDeviceptr_t*)pbase; \
- cb_data.args.hipMemGetAddressRange.psize = (size_t*)psize; \
- cb_data.args.hipMemGetAddressRange.dptr = (hipDeviceptr_t)dptr; \
-};
-// hipMemcpyFromSymbol[('void*', 'dst'), ('const void*', 'symbol'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')]
-#define INIT_hipMemcpyFromSymbol_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemcpyFromSymbol.dst = (void*)dst; \
- cb_data.args.hipMemcpyFromSymbol.symbol = (const void*)symbol; \
- cb_data.args.hipMemcpyFromSymbol.sizeBytes = (size_t)sizeBytes; \
- cb_data.args.hipMemcpyFromSymbol.offset = (size_t)offset; \
- cb_data.args.hipMemcpyFromSymbol.kind = (hipMemcpyKind)kind; \
-};
-// hipArrayCreate[('hipArray**', 'pHandle'), ('const HIP_ARRAY_DESCRIPTOR*', 'pAllocateArray')]
-#define INIT_hipArrayCreate_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipArrayCreate.pHandle = (hipArray**)array; \
- cb_data.args.hipArrayCreate.pAllocateArray = (const HIP_ARRAY_DESCRIPTOR*)pAllocateArray; \
-};
-// hipStreamAttachMemAsync[('hipStream_t', 'stream'), ('hipDeviceptr_t*', 'dev_ptr'), ('size_t', 'length'), ('unsigned int', 'flags')]
-#define INIT_hipStreamAttachMemAsync_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipStreamAttachMemAsync.stream = (hipStream_t)stream; \
- cb_data.args.hipStreamAttachMemAsync.dev_ptr = (hipDeviceptr_t*)dev_ptr; \
- cb_data.args.hipStreamAttachMemAsync.length = (size_t)length; \
- cb_data.args.hipStreamAttachMemAsync.flags = (unsigned int)flags; \
-};
-// hipStreamGetFlags[('hipStream_t', 'stream'), ('unsigned int*', 'flags')]
-#define INIT_hipStreamGetFlags_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipStreamGetFlags.stream = (hipStream_t)stream; \
- cb_data.args.hipStreamGetFlags.flags = (unsigned int*)flags; \
-};
-// hipMallocArray[('hipArray**', 'array'), ('const hipChannelFormatDesc*', 'desc'), ('size_t', 'width'), ('size_t', 'height'), ('unsigned int', 'flags')]
-#define INIT_hipMallocArray_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMallocArray.array = (hipArray**)array; \
- cb_data.args.hipMallocArray.desc = (const hipChannelFormatDesc*)desc; \
- cb_data.args.hipMallocArray.width = (size_t)width; \
- cb_data.args.hipMallocArray.height = (size_t)height; \
- cb_data.args.hipMallocArray.flags = (unsigned int)flags; \
-};
-// hipCtxGetSharedMemConfig[('hipSharedMemConfig*', 'pConfig')]
-#define INIT_hipCtxGetSharedMemConfig_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipCtxGetSharedMemConfig.pConfig = (hipSharedMemConfig*)pConfig; \
-};
-// hipDeviceDisablePeerAccess[('int', 'peerDeviceId')]
-#define INIT_hipDeviceDisablePeerAccess_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipDeviceDisablePeerAccess.peerDeviceId = (int)peerDeviceId; \
-};
-// hipModuleOccupancyMaxPotentialBlockSize[('int*', 'gridSize'), ('int*', 'blockSize'), ('hipFunction_t', 'f'), ('size_t', 'dynSharedMemPerBlk'), ('int', 'blockSizeLimit')]
-#define INIT_hipModuleOccupancyMaxPotentialBlockSize_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipModuleOccupancyMaxPotentialBlockSize.gridSize = (int*)gridSize; \
- cb_data.args.hipModuleOccupancyMaxPotentialBlockSize.blockSize = (int*)blockSize; \
- cb_data.args.hipModuleOccupancyMaxPotentialBlockSize.f = (hipFunction_t)f; \
- cb_data.args.hipModuleOccupancyMaxPotentialBlockSize.dynSharedMemPerBlk = (size_t)dynSharedMemPerBlk; \
- cb_data.args.hipModuleOccupancyMaxPotentialBlockSize.blockSizeLimit = (int)blockSizeLimit; \
-};
-// hipMemPtrGetInfo[('void*', 'ptr'), ('size_t*', 'size')]
-#define INIT_hipMemPtrGetInfo_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemPtrGetInfo.ptr = (void*)ptr; \
- cb_data.args.hipMemPtrGetInfo.size = (size_t*)size; \
-};
-// hipFuncGetAttribute[('int*', 'value'), ('hipFunction_attribute', 'attrib'), ('hipFunction_t', 'hfunc')]
-#define INIT_hipFuncGetAttribute_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipFuncGetAttribute.value = (int*)value; \
- cb_data.args.hipFuncGetAttribute.attrib = (hipFunction_attribute)attrib; \
- cb_data.args.hipFuncGetAttribute.hfunc = (hipFunction_t)hfunc; \
-};
-// hipCtxGetFlags[('unsigned int*', 'flags')]
-#define INIT_hipCtxGetFlags_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipCtxGetFlags.flags = (unsigned int*)flags; \
-};
-// hipStreamDestroy[('hipStream_t', 'stream')]
-#define INIT_hipStreamDestroy_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipStreamDestroy.stream = (hipStream_t)stream; \
-};
-// __hipPushCallConfiguration[('dim3', 'gridDim'), ('dim3', 'blockDim'), ('size_t', 'sharedMem'), ('hipStream_t', 'stream')]
-#define INIT___hipPushCallConfiguration_CB_ARGS_DATA(cb_data) { \
- cb_data.args.__hipPushCallConfiguration.gridDim = (dim3)gridDim; \
- cb_data.args.__hipPushCallConfiguration.blockDim = (dim3)blockDim; \
- cb_data.args.__hipPushCallConfiguration.sharedMem = (size_t)sharedMem; \
- cb_data.args.__hipPushCallConfiguration.stream = (hipStream_t)stream; \
-};
-// hipMemset3DAsync[('hipPitchedPtr', 'pitchedDevPtr'), ('int', 'value'), ('hipExtent', 'extent'), ('hipStream_t', 'stream')]
-#define INIT_hipMemset3DAsync_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemset3DAsync.pitchedDevPtr = (hipPitchedPtr)pitchedDevPtr; \
- cb_data.args.hipMemset3DAsync.value = (int)value; \
- cb_data.args.hipMemset3DAsync.extent = (hipExtent)extent; \
- cb_data.args.hipMemset3DAsync.stream = (hipStream_t)stream; \
-};
-// hipDeviceGetPCIBusId[('char*', 'pciBusId'), ('int', 'len'), ('int', 'device')]
-#define INIT_hipDeviceGetPCIBusId_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipDeviceGetPCIBusId.pciBusId = (char*)pciBusId; \
- cb_data.args.hipDeviceGetPCIBusId.len = (int)len; \
- cb_data.args.hipDeviceGetPCIBusId.device = (int)device; \
-};
-// hipInit[('unsigned int', 'flags')]
-#define INIT_hipInit_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipInit.flags = (unsigned int)flags; \
-};
-// hipMemcpyAtoH[('void*', 'dst'), ('hipArray*', 'srcArray'), ('size_t', 'srcOffset'), ('size_t', 'count')]
-#define INIT_hipMemcpyAtoH_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemcpyAtoH.dst = (void*)dstHost; \
- cb_data.args.hipMemcpyAtoH.srcArray = (hipArray*)srcArray; \
- cb_data.args.hipMemcpyAtoH.srcOffset = (size_t)srcOffset; \
- cb_data.args.hipMemcpyAtoH.count = (size_t)ByteCount; \
-};
-// hipStreamGetPriority[('hipStream_t', 'stream'), ('int*', 'priority')]
-#define INIT_hipStreamGetPriority_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipStreamGetPriority.stream = (hipStream_t)stream; \
- cb_data.args.hipStreamGetPriority.priority = (int*)priority; \
-};
-// hipMemset2D[('void*', 'dst'), ('size_t', 'pitch'), ('int', 'value'), ('size_t', 'width'), ('size_t', 'height')]
-#define INIT_hipMemset2D_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemset2D.dst = (void*)dst; \
- cb_data.args.hipMemset2D.pitch = (size_t)pitch; \
- cb_data.args.hipMemset2D.value = (int)value; \
- cb_data.args.hipMemset2D.width = (size_t)width; \
- cb_data.args.hipMemset2D.height = (size_t)height; \
-};
-// hipMemset2DAsync[('void*', 'dst'), ('size_t', 'pitch'), ('int', 'value'), ('size_t', 'width'), ('size_t', 'height'), ('hipStream_t', 'stream')]
-#define INIT_hipMemset2DAsync_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemset2DAsync.dst = (void*)dst; \
- cb_data.args.hipMemset2DAsync.pitch = (size_t)pitch; \
- cb_data.args.hipMemset2DAsync.value = (int)value; \
- cb_data.args.hipMemset2DAsync.width = (size_t)width; \
- cb_data.args.hipMemset2DAsync.height = (size_t)height; \
- cb_data.args.hipMemset2DAsync.stream = (hipStream_t)stream; \
-};
-// hipDeviceCanAccessPeer[('int*', 'canAccessPeer'), ('int', 'deviceId'), ('int', 'peerDeviceId')]
-#define INIT_hipDeviceCanAccessPeer_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipDeviceCanAccessPeer.canAccessPeer = (int*)canAccess; \
- cb_data.args.hipDeviceCanAccessPeer.deviceId = (int)deviceId; \
- cb_data.args.hipDeviceCanAccessPeer.peerDeviceId = (int)peerDeviceId; \
-};
-// hipLaunchByPtr[('const void*', 'hostFunction')]
-#define INIT_hipLaunchByPtr_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipLaunchByPtr.hostFunction = (const void*)hostFunction; \
-};
-// hipMemPrefetchAsync[('const void*', 'dev_ptr'), ('size_t', 'count'), ('int', 'device'), ('hipStream_t', 'stream')]
-#define INIT_hipMemPrefetchAsync_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemPrefetchAsync.dev_ptr = (const void*)dev_ptr; \
- cb_data.args.hipMemPrefetchAsync.count = (size_t)count; \
- cb_data.args.hipMemPrefetchAsync.device = (int)device; \
- cb_data.args.hipMemPrefetchAsync.stream = (hipStream_t)stream; \
-};
-// hipCtxDestroy[('hipCtx_t', 'ctx')]
-#define INIT_hipCtxDestroy_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipCtxDestroy.ctx = (hipCtx_t)ctx; \
-};
-// hipMemsetD16Async[('hipDeviceptr_t', 'dest'), ('unsigned short', 'value'), ('size_t', 'count'), ('hipStream_t', 'stream')]
-#define INIT_hipMemsetD16Async_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemsetD16Async.dest = (hipDeviceptr_t)dst; \
- cb_data.args.hipMemsetD16Async.value = (unsigned short)value; \
- cb_data.args.hipMemsetD16Async.count = (size_t)count; \
- cb_data.args.hipMemsetD16Async.stream = (hipStream_t)stream; \
-};
-// hipModuleUnload[('hipModule_t', 'module')]
-#define INIT_hipModuleUnload_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipModuleUnload.module = (hipModule_t)hmod; \
-};
-// hipHostUnregister[('void*', 'hostPtr')]
-#define INIT_hipHostUnregister_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipHostUnregister.hostPtr = (void*)hostPtr; \
-};
-// hipProfilerStop[]
-#define INIT_hipProfilerStop_CB_ARGS_DATA(cb_data) { \
-};
-// hipExtStreamCreateWithCUMask[('hipStream_t*', 'stream'), ('unsigned int', 'cuMaskSize'), ('const unsigned int*', 'cuMask')]
-#define INIT_hipExtStreamCreateWithCUMask_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipExtStreamCreateWithCUMask.stream = (hipStream_t*)stream; \
- cb_data.args.hipExtStreamCreateWithCUMask.cuMaskSize = (unsigned int)cuMaskSize; \
- cb_data.args.hipExtStreamCreateWithCUMask.cuMask = (const unsigned int*)cuMask; \
-};
-// hipStreamSynchronize[('hipStream_t', 'stream')]
-#define INIT_hipStreamSynchronize_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipStreamSynchronize.stream = (hipStream_t)stream; \
-};
-// hipFreeHost[('void*', 'ptr')]
-#define INIT_hipFreeHost_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipFreeHost.ptr = (void*)ptr; \
-};
-// hipDeviceSetCacheConfig[('hipFuncCache_t', 'cacheConfig')]
-#define INIT_hipDeviceSetCacheConfig_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipDeviceSetCacheConfig.cacheConfig = (hipFuncCache_t)cacheConfig; \
-};
-// hipGetErrorName[]
-#define INIT_hipGetErrorName_CB_ARGS_DATA(cb_data) { \
-};
-// hipMemcpyHtoD[('hipDeviceptr_t', 'dst'), ('void*', 'src'), ('size_t', 'sizeBytes')]
-#define INIT_hipMemcpyHtoD_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemcpyHtoD.dst = (hipDeviceptr_t)dstDevice; \
- cb_data.args.hipMemcpyHtoD.src = (void*)srcHost; \
- cb_data.args.hipMemcpyHtoD.sizeBytes = (size_t)ByteCount; \
-};
-// hipModuleGetGlobal[('hipDeviceptr_t*', 'dptr'), ('size_t*', 'bytes'), ('hipModule_t', 'hmod'), ('const char*', 'name')]
-#define INIT_hipModuleGetGlobal_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipModuleGetGlobal.dptr = (hipDeviceptr_t*)dptr; \
- cb_data.args.hipModuleGetGlobal.bytes = (size_t*)bytes; \
- cb_data.args.hipModuleGetGlobal.hmod = (hipModule_t)hmod; \
- cb_data.args.hipModuleGetGlobal.name = (name) ? strdup(name) : NULL; \
-};
-// hipMemcpyHtoA[('hipArray*', 'dstArray'), ('size_t', 'dstOffset'), ('const void*', 'srcHost'), ('size_t', 'count')]
-#define INIT_hipMemcpyHtoA_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemcpyHtoA.dstArray = (hipArray*)dstArray; \
- cb_data.args.hipMemcpyHtoA.dstOffset = (size_t)dstOffset; \
- cb_data.args.hipMemcpyHtoA.srcHost = (const void*)srcHost; \
- cb_data.args.hipMemcpyHtoA.count = (size_t)ByteCount; \
-};
-// hipCtxCreate[('hipCtx_t*', 'ctx'), ('unsigned int', 'flags'), ('hipDevice_t', 'device')]
-#define INIT_hipCtxCreate_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipCtxCreate.ctx = (hipCtx_t*)ctx; \
- cb_data.args.hipCtxCreate.flags = (unsigned int)flags; \
- cb_data.args.hipCtxCreate.device = (hipDevice_t)device; \
-};
-// hipMemcpy2D[('void*', 'dst'), ('size_t', 'dpitch'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind')]
-#define INIT_hipMemcpy2D_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemcpy2D.dst = (void*)dst; \
- cb_data.args.hipMemcpy2D.dpitch = (size_t)dpitch; \
- cb_data.args.hipMemcpy2D.src = (const void*)src; \
- cb_data.args.hipMemcpy2D.spitch = (size_t)spitch; \
- cb_data.args.hipMemcpy2D.width = (size_t)width; \
- cb_data.args.hipMemcpy2D.height = (size_t)height; \
- cb_data.args.hipMemcpy2D.kind = (hipMemcpyKind)kind; \
-};
-// hipIpcCloseMemHandle[('void*', 'devPtr')]
-#define INIT_hipIpcCloseMemHandle_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipIpcCloseMemHandle.devPtr = (void*)dev_ptr; \
-};
-// hipChooseDevice[('int*', 'device'), ('const hipDeviceProp_t*', 'prop')]
-#define INIT_hipChooseDevice_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipChooseDevice.device = (int*)device; \
- cb_data.args.hipChooseDevice.prop = (const hipDeviceProp_t*)properties; \
-};
-// hipDeviceSetSharedMemConfig[('hipSharedMemConfig', 'config')]
-#define INIT_hipDeviceSetSharedMemConfig_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipDeviceSetSharedMemConfig.config = (hipSharedMemConfig)config; \
-};
-// hipMallocMipmappedArray[('hipMipmappedArray_t*', 'mipmappedArray'), ('const hipChannelFormatDesc*', 'desc'), ('hipExtent', 'extent'), ('unsigned int', 'numLevels'), ('unsigned int', 'flags')]
-#define INIT_hipMallocMipmappedArray_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMallocMipmappedArray.mipmappedArray = (hipMipmappedArray_t*)mipmappedArray; \
- cb_data.args.hipMallocMipmappedArray.desc = (const hipChannelFormatDesc*)desc; \
- cb_data.args.hipMallocMipmappedArray.extent = (hipExtent)extent; \
- cb_data.args.hipMallocMipmappedArray.numLevels = (unsigned int)numLevels; \
- cb_data.args.hipMallocMipmappedArray.flags = (unsigned int)flags; \
-};
-// hipSetupArgument[('const void*', 'arg'), ('size_t', 'size'), ('size_t', 'offset')]
-#define INIT_hipSetupArgument_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipSetupArgument.arg = (const void*)arg; \
- cb_data.args.hipSetupArgument.size = (size_t)size; \
- cb_data.args.hipSetupArgument.offset = (size_t)offset; \
-};
-// hipIpcGetEventHandle[('hipIpcEventHandle_t*', 'handle'), ('hipEvent_t', 'event')]
-#define INIT_hipIpcGetEventHandle_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipIpcGetEventHandle.handle = (hipIpcEventHandle_t*)handle; \
- cb_data.args.hipIpcGetEventHandle.event = (hipEvent_t)event; \
-};
-// hipFreeArray[('hipArray*', 'array')]
-#define INIT_hipFreeArray_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipFreeArray.array = (hipArray*)array; \
-};
-// hipCtxSetCacheConfig[('hipFuncCache_t', 'cacheConfig')]
-#define INIT_hipCtxSetCacheConfig_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipCtxSetCacheConfig.cacheConfig = (hipFuncCache_t)cacheConfig; \
-};
-// hipFuncSetCacheConfig[('const void*', 'func'), ('hipFuncCache_t', 'config')]
-#define INIT_hipFuncSetCacheConfig_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipFuncSetCacheConfig.func = (const void*)func; \
- cb_data.args.hipFuncSetCacheConfig.config = (hipFuncCache_t)cacheConfig; \
-};
-// hipLaunchKernel[('const void*', 'function_address'), ('dim3', 'numBlocks'), ('dim3', 'dimBlocks'), ('void**', 'args'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'stream')]
-#define INIT_hipLaunchKernel_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipLaunchKernel.function_address = (const void*)hostFunction; \
- cb_data.args.hipLaunchKernel.numBlocks = (dim3)gridDim; \
- cb_data.args.hipLaunchKernel.dimBlocks = (dim3)blockDim; \
- cb_data.args.hipLaunchKernel.args = (void**)args; \
- cb_data.args.hipLaunchKernel.sharedMemBytes = (size_t)sharedMemBytes; \
- cb_data.args.hipLaunchKernel.stream = (hipStream_t)stream; \
-};
-// hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags[('int*', 'numBlocks'), ('hipFunction_t', 'f'), ('int', 'blockSize'), ('size_t', 'dynSharedMemPerBlk'), ('unsigned int', 'flags')]
-#define INIT_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks = (int*)numBlocks; \
- cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.f = (hipFunction_t)f; \
- cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.blockSize = (int)blockSize; \
- cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.dynSharedMemPerBlk = (size_t)dynSharedMemPerBlk; \
- cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.flags = (unsigned int)flags; \
-};
-// hipModuleGetTexRef[('textureReference**', 'texRef'), ('hipModule_t', 'hmod'), ('const char*', 'name')]
-#define INIT_hipModuleGetTexRef_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipModuleGetTexRef.texRef = (textureReference**)texRef; \
- cb_data.args.hipModuleGetTexRef.hmod = (hipModule_t)hmod; \
- cb_data.args.hipModuleGetTexRef.name = (name) ? strdup(name) : NULL; \
-};
-// hipFuncSetAttribute[('const void*', 'func'), ('hipFuncAttribute', 'attr'), ('int', 'value')]
-#define INIT_hipFuncSetAttribute_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipFuncSetAttribute.func = (const void*)func; \
- cb_data.args.hipFuncSetAttribute.attr = (hipFuncAttribute)attr; \
- cb_data.args.hipFuncSetAttribute.value = (int)value; \
-};
-// hipEventElapsedTime[('float*', 'ms'), ('hipEvent_t', 'start'), ('hipEvent_t', 'stop')]
-#define INIT_hipEventElapsedTime_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipEventElapsedTime.ms = (float*)ms; \
- cb_data.args.hipEventElapsedTime.start = (hipEvent_t)start; \
- cb_data.args.hipEventElapsedTime.stop = (hipEvent_t)stop; \
-};
-// hipConfigureCall[('dim3', 'gridDim'), ('dim3', 'blockDim'), ('size_t', 'sharedMem'), ('hipStream_t', 'stream')]
-#define INIT_hipConfigureCall_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipConfigureCall.gridDim = (dim3)gridDim; \
- cb_data.args.hipConfigureCall.blockDim = (dim3)blockDim; \
- cb_data.args.hipConfigureCall.sharedMem = (size_t)sharedMem; \
- cb_data.args.hipConfigureCall.stream = (hipStream_t)stream; \
-};
-// hipMemAdvise[('const void*', 'dev_ptr'), ('size_t', 'count'), ('hipMemoryAdvise', 'advice'), ('int', 'device')]
-#define INIT_hipMemAdvise_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemAdvise.dev_ptr = (const void*)dev_ptr; \
- cb_data.args.hipMemAdvise.count = (size_t)count; \
- cb_data.args.hipMemAdvise.advice = (hipMemoryAdvise)advice; \
- cb_data.args.hipMemAdvise.device = (int)device; \
-};
-// hipMemcpy3DAsync[('const hipMemcpy3DParms*', 'p'), ('hipStream_t', 'stream')]
-#define INIT_hipMemcpy3DAsync_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemcpy3DAsync.p = (const hipMemcpy3DParms*)p; \
- cb_data.args.hipMemcpy3DAsync.stream = (hipStream_t)stream; \
-};
-// hipEventDestroy[('hipEvent_t', 'event')]
-#define INIT_hipEventDestroy_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipEventDestroy.event = (hipEvent_t)event; \
-};
-// hipCtxPopCurrent[('hipCtx_t*', 'ctx')]
-#define INIT_hipCtxPopCurrent_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipCtxPopCurrent.ctx = (hipCtx_t*)ctx; \
-};
-// hipGetSymbolAddress[('void**', 'devPtr'), ('const void*', 'symbol')]
-#define INIT_hipGetSymbolAddress_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipGetSymbolAddress.devPtr = (void**)devPtr; \
- cb_data.args.hipGetSymbolAddress.symbol = (const void*)symbol; \
-};
-// hipHostGetFlags[('unsigned int*', 'flagsPtr'), ('void*', 'hostPtr')]
-#define INIT_hipHostGetFlags_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipHostGetFlags.flagsPtr = (unsigned int*)flagsPtr; \
- cb_data.args.hipHostGetFlags.hostPtr = (void*)hostPtr; \
-};
-// hipHostMalloc[('void**', 'ptr'), ('size_t', 'size'), ('unsigned int', 'flags')]
-#define INIT_hipHostMalloc_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipHostMalloc.ptr = (void**)ptr; \
- cb_data.args.hipHostMalloc.size = (size_t)sizeBytes; \
- cb_data.args.hipHostMalloc.flags = (unsigned int)flags; \
-};
-// hipCtxSetSharedMemConfig[('hipSharedMemConfig', 'config')]
-#define INIT_hipCtxSetSharedMemConfig_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipCtxSetSharedMemConfig.config = (hipSharedMemConfig)config; \
-};
-// hipFreeMipmappedArray[('hipMipmappedArray_t', 'mipmappedArray')]
-#define INIT_hipFreeMipmappedArray_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipFreeMipmappedArray.mipmappedArray = (hipMipmappedArray_t)mipmappedArray; \
-};
-// hipMemGetInfo[('size_t*', 'free'), ('size_t*', 'total')]
-#define INIT_hipMemGetInfo_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemGetInfo.free = (size_t*)free; \
- cb_data.args.hipMemGetInfo.total = (size_t*)total; \
-};
-// hipDeviceReset[]
-#define INIT_hipDeviceReset_CB_ARGS_DATA(cb_data) { \
-};
-// hipMemset[('void*', 'dst'), ('int', 'value'), ('size_t', 'sizeBytes')]
-#define INIT_hipMemset_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemset.dst = (void*)dst; \
- cb_data.args.hipMemset.value = (int)value; \
- cb_data.args.hipMemset.sizeBytes = (size_t)sizeBytes; \
-};
-// hipMemsetD8[('hipDeviceptr_t', 'dest'), ('unsigned char', 'value'), ('size_t', 'count')]
-#define INIT_hipMemsetD8_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemsetD8.dest = (hipDeviceptr_t)dst; \
- cb_data.args.hipMemsetD8.value = (unsigned char)value; \
- cb_data.args.hipMemsetD8.count = (size_t)count; \
-};
-// hipMemcpyParam2DAsync[('const hip_Memcpy2D*', 'pCopy'), ('hipStream_t', 'stream')]
-#define INIT_hipMemcpyParam2DAsync_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemcpyParam2DAsync.pCopy = (const hip_Memcpy2D*)pCopy; \
- cb_data.args.hipMemcpyParam2DAsync.stream = (hipStream_t)stream; \
-};
-// hipHostRegister[('void*', 'hostPtr'), ('size_t', 'sizeBytes'), ('unsigned int', 'flags')]
-#define INIT_hipHostRegister_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipHostRegister.hostPtr = (void*)hostPtr; \
- cb_data.args.hipHostRegister.sizeBytes = (size_t)sizeBytes; \
- cb_data.args.hipHostRegister.flags = (unsigned int)flags; \
-};
-// hipDriverGetVersion[('int*', 'driverVersion')]
-#define INIT_hipDriverGetVersion_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipDriverGetVersion.driverVersion = (int*)driverVersion; \
-};
-// hipArray3DCreate[('hipArray**', 'array'), ('const HIP_ARRAY3D_DESCRIPTOR*', 'pAllocateArray')]
-#define INIT_hipArray3DCreate_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipArray3DCreate.array = (hipArray**)array; \
- cb_data.args.hipArray3DCreate.pAllocateArray = (const HIP_ARRAY3D_DESCRIPTOR*)pAllocateArray; \
-};
-// hipIpcOpenMemHandle[('void**', 'devPtr'), ('hipIpcMemHandle_t', 'handle'), ('unsigned int', 'flags')]
-#define INIT_hipIpcOpenMemHandle_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipIpcOpenMemHandle.devPtr = (void**)dev_ptr; \
- cb_data.args.hipIpcOpenMemHandle.handle = (hipIpcMemHandle_t)handle; \
- cb_data.args.hipIpcOpenMemHandle.flags = (unsigned int)flags; \
-};
-// hipGetLastError[]
-#define INIT_hipGetLastError_CB_ARGS_DATA(cb_data) { \
-};
-// hipGetDeviceFlags[('unsigned int*', 'flags')]
-#define INIT_hipGetDeviceFlags_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipGetDeviceFlags.flags = (unsigned int*)flags; \
-};
-// hipDeviceGetSharedMemConfig[('hipSharedMemConfig*', 'pConfig')]
-#define INIT_hipDeviceGetSharedMemConfig_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipDeviceGetSharedMemConfig.pConfig = (hipSharedMemConfig*)pConfig; \
-};
-// hipDrvMemcpy3D[('const HIP_MEMCPY3D*', 'pCopy')]
-#define INIT_hipDrvMemcpy3D_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipDrvMemcpy3D.pCopy = (const HIP_MEMCPY3D*)pCopy; \
-};
-// hipMemcpy2DFromArray[('void*', 'dst'), ('size_t', 'dpitch'), ('hipArray_const_t', 'src'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind')]
-#define INIT_hipMemcpy2DFromArray_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemcpy2DFromArray.dst = (void*)dst; \
- cb_data.args.hipMemcpy2DFromArray.dpitch = (size_t)dpitch; \
- cb_data.args.hipMemcpy2DFromArray.src = (hipArray_const_t)src; \
- cb_data.args.hipMemcpy2DFromArray.wOffset = (size_t)wOffsetSrc; \
- cb_data.args.hipMemcpy2DFromArray.hOffset = (size_t)hOffset; \
- cb_data.args.hipMemcpy2DFromArray.width = (size_t)width; \
- cb_data.args.hipMemcpy2DFromArray.height = (size_t)height; \
- cb_data.args.hipMemcpy2DFromArray.kind = (hipMemcpyKind)kind; \
-};
-// hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags[('int*', 'numBlocks'), ('const void*', 'f'), ('int', 'blockSize'), ('size_t', 'dynamicSMemSize'), ('unsigned int', 'flags')]
-#define INIT_hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks = (int*)numBlocks; \
- cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.f = (const void*)f; \
- cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.blockSize = (int)blockSize; \
- cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.dynamicSMemSize = (size_t)dynamicSMemSize; \
- cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.flags = (unsigned int)flags; \
-};
-// hipSetDeviceFlags[('unsigned int', 'flags')]
-#define INIT_hipSetDeviceFlags_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipSetDeviceFlags.flags = (unsigned int)flags; \
-};
-// hipHccModuleLaunchKernel[('hipFunction_t', 'f'), ('unsigned int', 'globalWorkSizeX'), ('unsigned int', 'globalWorkSizeY'), ('unsigned int', 'globalWorkSizeZ'), ('unsigned int', 'blockDimX'), ('unsigned int', 'blockDimY'), ('unsigned int', 'blockDimZ'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'hStream'), ('void**', 'kernelParams'), ('void**', 'extra'), ('hipEvent_t', 'startEvent'), ('hipEvent_t', 'stopEvent')]
-#define INIT_hipHccModuleLaunchKernel_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipHccModuleLaunchKernel.f = (hipFunction_t)f; \
- cb_data.args.hipHccModuleLaunchKernel.globalWorkSizeX = (unsigned int)globalWorkSizeX; \
- cb_data.args.hipHccModuleLaunchKernel.globalWorkSizeY = (unsigned int)globalWorkSizeY; \
- cb_data.args.hipHccModuleLaunchKernel.globalWorkSizeZ = (unsigned int)globalWorkSizeZ; \
- cb_data.args.hipHccModuleLaunchKernel.blockDimX = (unsigned int)blockDimX; \
- cb_data.args.hipHccModuleLaunchKernel.blockDimY = (unsigned int)blockDimY; \
- cb_data.args.hipHccModuleLaunchKernel.blockDimZ = (unsigned int)blockDimZ; \
- cb_data.args.hipHccModuleLaunchKernel.sharedMemBytes = (size_t)sharedMemBytes; \
- cb_data.args.hipHccModuleLaunchKernel.hStream = (hipStream_t)hStream; \
- cb_data.args.hipHccModuleLaunchKernel.kernelParams = (void**)kernelParams; \
- cb_data.args.hipHccModuleLaunchKernel.extra = (void**)extra; \
- cb_data.args.hipHccModuleLaunchKernel.startEvent = (hipEvent_t)startEvent; \
- cb_data.args.hipHccModuleLaunchKernel.stopEvent = (hipEvent_t)stopEvent; \
-};
-// hipFree[('void*', 'ptr')]
-#define INIT_hipFree_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipFree.ptr = (void*)ptr; \
-};
-// hipOccupancyMaxPotentialBlockSize[('int*', 'gridSize'), ('int*', 'blockSize'), ('const void*', 'f'), ('size_t', 'dynSharedMemPerBlk'), ('int', 'blockSizeLimit')]
-#define INIT_hipOccupancyMaxPotentialBlockSize_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipOccupancyMaxPotentialBlockSize.gridSize = (int*)gridSize; \
- cb_data.args.hipOccupancyMaxPotentialBlockSize.blockSize = (int*)blockSize; \
- cb_data.args.hipOccupancyMaxPotentialBlockSize.f = (const void*)f; \
- cb_data.args.hipOccupancyMaxPotentialBlockSize.dynSharedMemPerBlk = (size_t)dynSharedMemPerBlk; \
- cb_data.args.hipOccupancyMaxPotentialBlockSize.blockSizeLimit = (int)blockSizeLimit; \
-};
-// hipDeviceGetAttribute[('int*', 'pi'), ('hipDeviceAttribute_t', 'attr'), ('int', 'deviceId')]
-#define INIT_hipDeviceGetAttribute_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipDeviceGetAttribute.pi = (int*)pi; \
- cb_data.args.hipDeviceGetAttribute.attr = (hipDeviceAttribute_t)attr; \
- cb_data.args.hipDeviceGetAttribute.deviceId = (int)device; \
-};
-// hipDeviceComputeCapability[('int*', 'major'), ('int*', 'minor'), ('hipDevice_t', 'device')]
-#define INIT_hipDeviceComputeCapability_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipDeviceComputeCapability.major = (int*)major; \
- cb_data.args.hipDeviceComputeCapability.minor = (int*)minor; \
- cb_data.args.hipDeviceComputeCapability.device = (hipDevice_t)device; \
-};
-// hipCtxDisablePeerAccess[('hipCtx_t', 'peerCtx')]
-#define INIT_hipCtxDisablePeerAccess_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipCtxDisablePeerAccess.peerCtx = (hipCtx_t)peerCtx; \
-};
-// hipMallocManaged[('void**', 'dev_ptr'), ('size_t', 'size'), ('unsigned int', 'flags')]
-#define INIT_hipMallocManaged_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMallocManaged.dev_ptr = (void**)dev_ptr; \
- cb_data.args.hipMallocManaged.size = (size_t)size; \
- cb_data.args.hipMallocManaged.flags = (unsigned int)flags; \
-};
-// hipDeviceGetByPCIBusId[('int*', 'device'), ('const char*', 'pciBusId')]
-#define INIT_hipDeviceGetByPCIBusId_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipDeviceGetByPCIBusId.device = (int*)device; \
- cb_data.args.hipDeviceGetByPCIBusId.pciBusId = (pciBusIdstr) ? strdup(pciBusIdstr) : NULL; \
-};
-// hipIpcGetMemHandle[('hipIpcMemHandle_t*', 'handle'), ('void*', 'devPtr')]
-#define INIT_hipIpcGetMemHandle_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipIpcGetMemHandle.handle = (hipIpcMemHandle_t*)handle; \
- cb_data.args.hipIpcGetMemHandle.devPtr = (void*)dev_ptr; \
-};
-// hipMemcpyHtoDAsync[('hipDeviceptr_t', 'dst'), ('void*', 'src'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')]
-#define INIT_hipMemcpyHtoDAsync_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemcpyHtoDAsync.dst = (hipDeviceptr_t)dstDevice; \
- cb_data.args.hipMemcpyHtoDAsync.src = (void*)srcHost; \
- cb_data.args.hipMemcpyHtoDAsync.sizeBytes = (size_t)ByteCount; \
- cb_data.args.hipMemcpyHtoDAsync.stream = (hipStream_t)stream; \
-};
-// hipCtxGetDevice[('hipDevice_t*', 'device')]
-#define INIT_hipCtxGetDevice_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipCtxGetDevice.device = (hipDevice_t*)device; \
-};
-// hipMemcpyDtoD[('hipDeviceptr_t', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes')]
-#define INIT_hipMemcpyDtoD_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemcpyDtoD.dst = (hipDeviceptr_t)dstDevice; \
- cb_data.args.hipMemcpyDtoD.src = (hipDeviceptr_t)srcDevice; \
- cb_data.args.hipMemcpyDtoD.sizeBytes = (size_t)ByteCount; \
-};
-// hipModuleLoadData[('hipModule_t*', 'module'), ('const void*', 'image')]
-#define INIT_hipModuleLoadData_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipModuleLoadData.module = (hipModule_t*)module; \
- cb_data.args.hipModuleLoadData.image = (const void*)image; \
-};
-// hipDevicePrimaryCtxRelease[('hipDevice_t', 'dev')]
-#define INIT_hipDevicePrimaryCtxRelease_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipDevicePrimaryCtxRelease.dev = (hipDevice_t)dev; \
-};
-// hipOccupancyMaxActiveBlocksPerMultiprocessor[('int*', 'numBlocks'), ('const void*', 'f'), ('int', 'blockSize'), ('size_t', 'dynamicSMemSize')]
-#define INIT_hipOccupancyMaxActiveBlocksPerMultiprocessor_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks = (int*)numBlocks; \
- cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessor.f = (const void*)f; \
- cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessor.blockSize = (int)blockSize; \
- cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessor.dynamicSMemSize = (size_t)dynamicSMemSize; \
-};
-// hipCtxSetCurrent[('hipCtx_t', 'ctx')]
-#define INIT_hipCtxSetCurrent_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipCtxSetCurrent.ctx = (hipCtx_t)ctx; \
-};
-// hipGetErrorString[]
-#define INIT_hipGetErrorString_CB_ARGS_DATA(cb_data) { \
-};
-// hipStreamCreate[('hipStream_t*', 'stream')]
-#define INIT_hipStreamCreate_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipStreamCreate.stream = (hipStream_t*)stream; \
-};
-// hipDevicePrimaryCtxRetain[('hipCtx_t*', 'pctx'), ('hipDevice_t', 'dev')]
-#define INIT_hipDevicePrimaryCtxRetain_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipDevicePrimaryCtxRetain.pctx = (hipCtx_t*)pctx; \
- cb_data.args.hipDevicePrimaryCtxRetain.dev = (hipDevice_t)dev; \
-};
-// hipDeviceGet[('hipDevice_t*', 'device'), ('int', 'ordinal')]
-#define INIT_hipDeviceGet_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipDeviceGet.device = (hipDevice_t*)device; \
- cb_data.args.hipDeviceGet.ordinal = (int)deviceId; \
-};
-// hipStreamCreateWithFlags[('hipStream_t*', 'stream'), ('unsigned int', 'flags')]
-#define INIT_hipStreamCreateWithFlags_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipStreamCreateWithFlags.stream = (hipStream_t*)stream; \
- cb_data.args.hipStreamCreateWithFlags.flags = (unsigned int)flags; \
-};
-// hipMemcpyFromArray[('void*', 'dst'), ('hipArray_const_t', 'srcArray'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')]
-#define INIT_hipMemcpyFromArray_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemcpyFromArray.dst = (void*)dst; \
- cb_data.args.hipMemcpyFromArray.srcArray = (hipArray_const_t)src; \
- cb_data.args.hipMemcpyFromArray.wOffset = (size_t)wOffsetSrc; \
- cb_data.args.hipMemcpyFromArray.hOffset = (size_t)hOffset; \
- cb_data.args.hipMemcpyFromArray.count = (size_t)count; \
- cb_data.args.hipMemcpyFromArray.kind = (hipMemcpyKind)kind; \
-};
-// hipMemcpy2DAsync[('void*', 'dst'), ('size_t', 'dpitch'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')]
-#define INIT_hipMemcpy2DAsync_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemcpy2DAsync.dst = (void*)dst; \
- cb_data.args.hipMemcpy2DAsync.dpitch = (size_t)dpitch; \
- cb_data.args.hipMemcpy2DAsync.src = (const void*)src; \
- cb_data.args.hipMemcpy2DAsync.spitch = (size_t)spitch; \
- cb_data.args.hipMemcpy2DAsync.width = (size_t)width; \
- cb_data.args.hipMemcpy2DAsync.height = (size_t)height; \
- cb_data.args.hipMemcpy2DAsync.kind = (hipMemcpyKind)kind; \
- cb_data.args.hipMemcpy2DAsync.stream = (hipStream_t)stream; \
-};
-// hipFuncGetAttributes[('hipFuncAttributes*', 'attr'), ('const void*', 'func')]
-#define INIT_hipFuncGetAttributes_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipFuncGetAttributes.attr = (hipFuncAttributes*)attr; \
- cb_data.args.hipFuncGetAttributes.func = (const void*)func; \
-};
-// hipGetSymbolSize[('size_t*', 'size'), ('const void*', 'symbol')]
-#define INIT_hipGetSymbolSize_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipGetSymbolSize.size = (size_t*)sizePtr; \
- cb_data.args.hipGetSymbolSize.symbol = (const void*)symbol; \
-};
-// hipHostFree[('void*', 'ptr')]
-#define INIT_hipHostFree_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipHostFree.ptr = (void*)ptr; \
-};
-// hipEventCreateWithFlags[('hipEvent_t*', 'event'), ('unsigned int', 'flags')]
-#define INIT_hipEventCreateWithFlags_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipEventCreateWithFlags.event = (hipEvent_t*)event; \
- cb_data.args.hipEventCreateWithFlags.flags = (unsigned int)flags; \
-};
-// hipStreamQuery[('hipStream_t', 'stream')]
-#define INIT_hipStreamQuery_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipStreamQuery.stream = (hipStream_t)stream; \
-};
-// hipMemcpy3D[('const hipMemcpy3DParms*', 'p')]
-#define INIT_hipMemcpy3D_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemcpy3D.p = (const hipMemcpy3DParms*)p; \
-};
-// hipMemcpyToSymbol[('const void*', 'symbol'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')]
-#define INIT_hipMemcpyToSymbol_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemcpyToSymbol.symbol = (const void*)symbol; \
- cb_data.args.hipMemcpyToSymbol.src = (const void*)src; \
- cb_data.args.hipMemcpyToSymbol.sizeBytes = (size_t)sizeBytes; \
- cb_data.args.hipMemcpyToSymbol.offset = (size_t)offset; \
- cb_data.args.hipMemcpyToSymbol.kind = (hipMemcpyKind)kind; \
-};
-// hipMemcpy[('void*', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipMemcpyKind', 'kind')]
-#define INIT_hipMemcpy_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemcpy.dst = (void*)dst; \
- cb_data.args.hipMemcpy.src = (const void*)src; \
- cb_data.args.hipMemcpy.sizeBytes = (size_t)sizeBytes; \
- cb_data.args.hipMemcpy.kind = (hipMemcpyKind)kind; \
-};
-// hipPeekAtLastError[]
-#define INIT_hipPeekAtLastError_CB_ARGS_DATA(cb_data) { \
-};
-// hipExtLaunchMultiKernelMultiDevice[('hipLaunchParams*', 'launchParamsList'), ('int', 'numDevices'), ('unsigned int', 'flags')]
-#define INIT_hipExtLaunchMultiKernelMultiDevice_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipExtLaunchMultiKernelMultiDevice.launchParamsList = (hipLaunchParams*)launchParamsList; \
- cb_data.args.hipExtLaunchMultiKernelMultiDevice.numDevices = (int)numDevices; \
- cb_data.args.hipExtLaunchMultiKernelMultiDevice.flags = (unsigned int)flags; \
-};
-// hipHostAlloc[('void**', 'ptr'), ('size_t', 'size'), ('unsigned int', 'flags')]
-#define INIT_hipHostAlloc_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipHostAlloc.ptr = (void**)ptr; \
- cb_data.args.hipHostAlloc.size = (size_t)sizeBytes; \
- cb_data.args.hipHostAlloc.flags = (unsigned int)flags; \
-};
-// hipStreamAddCallback[('hipStream_t', 'stream'), ('hipStreamCallback_t', 'callback'), ('void*', 'userData'), ('unsigned int', 'flags')]
-#define INIT_hipStreamAddCallback_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipStreamAddCallback.stream = (hipStream_t)stream; \
- cb_data.args.hipStreamAddCallback.callback = (hipStreamCallback_t)callback; \
- cb_data.args.hipStreamAddCallback.userData = (void*)userData; \
- cb_data.args.hipStreamAddCallback.flags = (unsigned int)flags; \
-};
-// hipMemcpyToArray[('hipArray*', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')]
-#define INIT_hipMemcpyToArray_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemcpyToArray.dst = (hipArray*)dst; \
- cb_data.args.hipMemcpyToArray.wOffset = (size_t)wOffset; \
- cb_data.args.hipMemcpyToArray.hOffset = (size_t)hOffset; \
- cb_data.args.hipMemcpyToArray.src = (const void*)src; \
- cb_data.args.hipMemcpyToArray.count = (size_t)count; \
- cb_data.args.hipMemcpyToArray.kind = (hipMemcpyKind)kind; \
-};
-// hipMemsetD32[('hipDeviceptr_t', 'dest'), ('int', 'value'), ('size_t', 'count')]
-#define INIT_hipMemsetD32_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemsetD32.dest = (hipDeviceptr_t)dst; \
- cb_data.args.hipMemsetD32.value = (int)value; \
- cb_data.args.hipMemsetD32.count = (size_t)count; \
-};
-// hipExtModuleLaunchKernel[('hipFunction_t', 'f'), ('unsigned int', 'globalWorkSizeX'), ('unsigned int', 'globalWorkSizeY'), ('unsigned int', 'globalWorkSizeZ'), ('unsigned int', 'localWorkSizeX'), ('unsigned int', 'localWorkSizeY'), ('unsigned int', 'localWorkSizeZ'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'hStream'), ('void**', 'kernelParams'), ('void**', 'extra'), ('hipEvent_t', 'startEvent'), ('hipEvent_t', 'stopEvent'), ('unsigned int', 'flags')]
-#define INIT_hipExtModuleLaunchKernel_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipExtModuleLaunchKernel.f = (hipFunction_t)f; \
- cb_data.args.hipExtModuleLaunchKernel.globalWorkSizeX = (unsigned int)globalWorkSizeX; \
- cb_data.args.hipExtModuleLaunchKernel.globalWorkSizeY = (unsigned int)globalWorkSizeY; \
- cb_data.args.hipExtModuleLaunchKernel.globalWorkSizeZ = (unsigned int)globalWorkSizeZ; \
- cb_data.args.hipExtModuleLaunchKernel.localWorkSizeX = (unsigned int)localWorkSizeX; \
- cb_data.args.hipExtModuleLaunchKernel.localWorkSizeY = (unsigned int)localWorkSizeY; \
- cb_data.args.hipExtModuleLaunchKernel.localWorkSizeZ = (unsigned int)localWorkSizeZ; \
- cb_data.args.hipExtModuleLaunchKernel.sharedMemBytes = (size_t)sharedMemBytes; \
- cb_data.args.hipExtModuleLaunchKernel.hStream = (hipStream_t)hStream; \
- cb_data.args.hipExtModuleLaunchKernel.kernelParams = (void**)kernelParams; \
- cb_data.args.hipExtModuleLaunchKernel.extra = (void**)extra; \
- cb_data.args.hipExtModuleLaunchKernel.startEvent = (hipEvent_t)startEvent; \
- cb_data.args.hipExtModuleLaunchKernel.stopEvent = (hipEvent_t)stopEvent; \
- cb_data.args.hipExtModuleLaunchKernel.flags = (unsigned int)flags; \
-};
-// hipDeviceSynchronize[]
-#define INIT_hipDeviceSynchronize_CB_ARGS_DATA(cb_data) { \
-};
-// hipDeviceGetCacheConfig[('hipFuncCache_t*', 'cacheConfig')]
-#define INIT_hipDeviceGetCacheConfig_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipDeviceGetCacheConfig.cacheConfig = (hipFuncCache_t*)cacheConfig; \
-};
-// hipMalloc3D[('hipPitchedPtr*', 'pitchedDevPtr'), ('hipExtent', 'extent')]
-#define INIT_hipMalloc3D_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMalloc3D.pitchedDevPtr = (hipPitchedPtr*)pitchedDevPtr; \
- cb_data.args.hipMalloc3D.extent = (hipExtent)extent; \
-};
-// hipPointerGetAttributes[('hipPointerAttribute_t*', 'attributes'), ('const void*', 'ptr')]
-#define INIT_hipPointerGetAttributes_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipPointerGetAttributes.attributes = (hipPointerAttribute_t*)attributes; \
- cb_data.args.hipPointerGetAttributes.ptr = (const void*)ptr; \
-};
-// hipMemsetAsync[('void*', 'dst'), ('int', 'value'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')]
-#define INIT_hipMemsetAsync_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemsetAsync.dst = (void*)dst; \
- cb_data.args.hipMemsetAsync.value = (int)value; \
- cb_data.args.hipMemsetAsync.sizeBytes = (size_t)sizeBytes; \
- cb_data.args.hipMemsetAsync.stream = (hipStream_t)stream; \
-};
-// hipDeviceGetName[('char*', 'name'), ('int', 'len'), ('hipDevice_t', 'device')]
-#define INIT_hipDeviceGetName_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipDeviceGetName.name = (char*)name; \
- cb_data.args.hipDeviceGetName.len = (int)len; \
- cb_data.args.hipDeviceGetName.device = (hipDevice_t)device; \
-};
-// hipModuleOccupancyMaxPotentialBlockSizeWithFlags[('int*', 'gridSize'), ('int*', 'blockSize'), ('hipFunction_t', 'f'), ('size_t', 'dynSharedMemPerBlk'), ('int', 'blockSizeLimit'), ('unsigned int', 'flags')]
-#define INIT_hipModuleOccupancyMaxPotentialBlockSizeWithFlags_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.gridSize = (int*)gridSize; \
- cb_data.args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSize = (int*)blockSize; \
- cb_data.args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.f = (hipFunction_t)f; \
- cb_data.args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.dynSharedMemPerBlk = (size_t)dynSharedMemPerBlk; \
- cb_data.args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSizeLimit = (int)blockSizeLimit; \
- cb_data.args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.flags = (unsigned int)flags; \
-};
-// hipCtxPushCurrent[('hipCtx_t', 'ctx')]
-#define INIT_hipCtxPushCurrent_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipCtxPushCurrent.ctx = (hipCtx_t)ctx; \
-};
-// hipMemcpyPeer[('void*', 'dst'), ('int', 'dstDeviceId'), ('const void*', 'src'), ('int', 'srcDeviceId'), ('size_t', 'sizeBytes')]
-#define INIT_hipMemcpyPeer_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemcpyPeer.dst = (void*)dst; \
- cb_data.args.hipMemcpyPeer.dstDeviceId = (int)dstDevice; \
- cb_data.args.hipMemcpyPeer.src = (const void*)src; \
- cb_data.args.hipMemcpyPeer.srcDeviceId = (int)srcDevice; \
- cb_data.args.hipMemcpyPeer.sizeBytes = (size_t)sizeBytes; \
-};
-// hipEventSynchronize[('hipEvent_t', 'event')]
-#define INIT_hipEventSynchronize_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipEventSynchronize.event = (hipEvent_t)event; \
-};
-// hipMemcpyDtoDAsync[('hipDeviceptr_t', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')]
-#define INIT_hipMemcpyDtoDAsync_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemcpyDtoDAsync.dst = (hipDeviceptr_t)dstDevice; \
- cb_data.args.hipMemcpyDtoDAsync.src = (hipDeviceptr_t)srcDevice; \
- cb_data.args.hipMemcpyDtoDAsync.sizeBytes = (size_t)ByteCount; \
- cb_data.args.hipMemcpyDtoDAsync.stream = (hipStream_t)stream; \
-};
-// hipProfilerStart[]
-#define INIT_hipProfilerStart_CB_ARGS_DATA(cb_data) { \
-};
-// hipExtMallocWithFlags[('void**', 'ptr'), ('size_t', 'sizeBytes'), ('unsigned int', 'flags')]
-#define INIT_hipExtMallocWithFlags_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipExtMallocWithFlags.ptr = (void**)ptr; \
- cb_data.args.hipExtMallocWithFlags.sizeBytes = (size_t)sizeBytes; \
- cb_data.args.hipExtMallocWithFlags.flags = (unsigned int)flags; \
-};
-// hipCtxEnablePeerAccess[('hipCtx_t', 'peerCtx'), ('unsigned int', 'flags')]
-#define INIT_hipCtxEnablePeerAccess_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipCtxEnablePeerAccess.peerCtx = (hipCtx_t)peerCtx; \
- cb_data.args.hipCtxEnablePeerAccess.flags = (unsigned int)flags; \
-};
-// hipMemAllocHost[('void**', 'ptr'), ('size_t', 'size')]
-#define INIT_hipMemAllocHost_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemAllocHost.ptr = (void**)ptr; \
- cb_data.args.hipMemAllocHost.size = (size_t)size; \
-};
-// hipMemcpyDtoHAsync[('void*', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')]
-#define INIT_hipMemcpyDtoHAsync_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemcpyDtoHAsync.dst = (void*)dstHost; \
- cb_data.args.hipMemcpyDtoHAsync.src = (hipDeviceptr_t)srcDevice; \
- cb_data.args.hipMemcpyDtoHAsync.sizeBytes = (size_t)ByteCount; \
- cb_data.args.hipMemcpyDtoHAsync.stream = (hipStream_t)stream; \
-};
-// hipModuleLaunchKernel[('hipFunction_t', 'f'), ('unsigned int', 'gridDimX'), ('unsigned int', 'gridDimY'), ('unsigned int', 'gridDimZ'), ('unsigned int', 'blockDimX'), ('unsigned int', 'blockDimY'), ('unsigned int', 'blockDimZ'), ('unsigned int', 'sharedMemBytes'), ('hipStream_t', 'stream'), ('void**', 'kernelParams'), ('void**', 'extra')]
-#define INIT_hipModuleLaunchKernel_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipModuleLaunchKernel.f = (hipFunction_t)f; \
- cb_data.args.hipModuleLaunchKernel.gridDimX = (unsigned int)gridDimX; \
- cb_data.args.hipModuleLaunchKernel.gridDimY = (unsigned int)gridDimY; \
- cb_data.args.hipModuleLaunchKernel.gridDimZ = (unsigned int)gridDimZ; \
- cb_data.args.hipModuleLaunchKernel.blockDimX = (unsigned int)blockDimX; \
- cb_data.args.hipModuleLaunchKernel.blockDimY = (unsigned int)blockDimY; \
- cb_data.args.hipModuleLaunchKernel.blockDimZ = (unsigned int)blockDimZ; \
- cb_data.args.hipModuleLaunchKernel.sharedMemBytes = (unsigned int)sharedMemBytes; \
- cb_data.args.hipModuleLaunchKernel.stream = (hipStream_t)hStream; \
- cb_data.args.hipModuleLaunchKernel.kernelParams = (void**)kernelParams; \
- cb_data.args.hipModuleLaunchKernel.extra = (void**)extra; \
-};
-// hipMemAllocPitch[('hipDeviceptr_t*', 'dptr'), ('size_t*', 'pitch'), ('size_t', 'widthInBytes'), ('size_t', 'height'), ('unsigned int', 'elementSizeBytes')]
-#define INIT_hipMemAllocPitch_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemAllocPitch.dptr = (hipDeviceptr_t*)dptr; \
- cb_data.args.hipMemAllocPitch.pitch = (size_t*)pitch; \
- cb_data.args.hipMemAllocPitch.widthInBytes = (size_t)widthInBytes; \
- cb_data.args.hipMemAllocPitch.height = (size_t)height; \
- cb_data.args.hipMemAllocPitch.elementSizeBytes = (unsigned int)elementSizeBytes; \
-};
-// hipExtLaunchKernel[('const void*', 'function_address'), ('dim3', 'numBlocks'), ('dim3', 'dimBlocks'), ('void**', 'args'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'stream'), ('hipEvent_t', 'startEvent'), ('hipEvent_t', 'stopEvent'), ('int', 'flags')]
-#define INIT_hipExtLaunchKernel_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipExtLaunchKernel.function_address = (const void*)hostFunction; \
- cb_data.args.hipExtLaunchKernel.numBlocks = (dim3)gridDim; \
- cb_data.args.hipExtLaunchKernel.dimBlocks = (dim3)blockDim; \
- cb_data.args.hipExtLaunchKernel.args = (void**)args; \
- cb_data.args.hipExtLaunchKernel.sharedMemBytes = (size_t)sharedMemBytes; \
- cb_data.args.hipExtLaunchKernel.stream = (hipStream_t)stream; \
- cb_data.args.hipExtLaunchKernel.startEvent = (hipEvent_t)startEvent; \
- cb_data.args.hipExtLaunchKernel.stopEvent = (hipEvent_t)stopEvent; \
- cb_data.args.hipExtLaunchKernel.flags = (int)flags; \
-};
-// hipMemcpy2DFromArrayAsync[('void*', 'dst'), ('size_t', 'dpitch'), ('hipArray_const_t', 'src'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')]
-#define INIT_hipMemcpy2DFromArrayAsync_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemcpy2DFromArrayAsync.dst = (void*)dst; \
- cb_data.args.hipMemcpy2DFromArrayAsync.dpitch = (size_t)dpitch; \
- cb_data.args.hipMemcpy2DFromArrayAsync.src = (hipArray_const_t)src; \
- cb_data.args.hipMemcpy2DFromArrayAsync.wOffset = (size_t)wOffsetSrc; \
- cb_data.args.hipMemcpy2DFromArrayAsync.hOffset = (size_t)hOffsetSrc; \
- cb_data.args.hipMemcpy2DFromArrayAsync.width = (size_t)width; \
- cb_data.args.hipMemcpy2DFromArrayAsync.height = (size_t)height; \
- cb_data.args.hipMemcpy2DFromArrayAsync.kind = (hipMemcpyKind)kind; \
- cb_data.args.hipMemcpy2DFromArrayAsync.stream = (hipStream_t)stream; \
-};
-// hipDeviceGetLimit[('size_t*', 'pValue'), ('hipLimit_t', 'limit')]
-#define INIT_hipDeviceGetLimit_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipDeviceGetLimit.pValue = (size_t*)pValue; \
- cb_data.args.hipDeviceGetLimit.limit = (hipLimit_t)limit; \
-};
-// hipModuleLoadDataEx[('hipModule_t*', 'module'), ('const void*', 'image'), ('unsigned int', 'numOptions'), ('hipJitOption*', 'options'), ('void**', 'optionsValues')]
-#define INIT_hipModuleLoadDataEx_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipModuleLoadDataEx.module = (hipModule_t*)module; \
- cb_data.args.hipModuleLoadDataEx.image = (const void*)image; \
- cb_data.args.hipModuleLoadDataEx.numOptions = (unsigned int)numOptions; \
- cb_data.args.hipModuleLoadDataEx.options = (hipJitOption*)options; \
- cb_data.args.hipModuleLoadDataEx.optionsValues = (void**)optionsValues; \
-};
-// hipRuntimeGetVersion[('int*', 'runtimeVersion')]
-#define INIT_hipRuntimeGetVersion_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipRuntimeGetVersion.runtimeVersion = (int*)runtimeVersion; \
-};
-// hipMemRangeGetAttribute[('void*', 'data'), ('size_t', 'data_size'), ('hipMemRangeAttribute', 'attribute'), ('const void*', 'dev_ptr'), ('size_t', 'count')]
-#define INIT_hipMemRangeGetAttribute_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemRangeGetAttribute.data = (void*)data; \
- cb_data.args.hipMemRangeGetAttribute.data_size = (size_t)data_size; \
- cb_data.args.hipMemRangeGetAttribute.attribute = (hipMemRangeAttribute)attribute; \
- cb_data.args.hipMemRangeGetAttribute.dev_ptr = (const void*)dev_ptr; \
- cb_data.args.hipMemRangeGetAttribute.count = (size_t)count; \
-};
-// hipDeviceGetP2PAttribute[('int*', 'value'), ('hipDeviceP2PAttr', 'attr'), ('int', 'srcDevice'), ('int', 'dstDevice')]
-#define INIT_hipDeviceGetP2PAttribute_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipDeviceGetP2PAttribute.value = (int*)value; \
- cb_data.args.hipDeviceGetP2PAttribute.attr = (hipDeviceP2PAttr)attr; \
- cb_data.args.hipDeviceGetP2PAttribute.srcDevice = (int)srcDevice; \
- cb_data.args.hipDeviceGetP2PAttribute.dstDevice = (int)dstDevice; \
-};
-// hipMemcpyPeerAsync[('void*', 'dst'), ('int', 'dstDeviceId'), ('const void*', 'src'), ('int', 'srcDevice'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')]
-#define INIT_hipMemcpyPeerAsync_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemcpyPeerAsync.dst = (void*)dst; \
- cb_data.args.hipMemcpyPeerAsync.dstDeviceId = (int)dstDevice; \
- cb_data.args.hipMemcpyPeerAsync.src = (const void*)src; \
- cb_data.args.hipMemcpyPeerAsync.srcDevice = (int)srcDevice; \
- cb_data.args.hipMemcpyPeerAsync.sizeBytes = (size_t)sizeBytes; \
- cb_data.args.hipMemcpyPeerAsync.stream = (hipStream_t)stream; \
-};
-// hipGetDeviceProperties[('hipDeviceProp_t*', 'props'), ('hipDevice_t', 'device')]
-#define INIT_hipGetDeviceProperties_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipGetDeviceProperties.props = (hipDeviceProp_t*)props; \
- cb_data.args.hipGetDeviceProperties.device = (hipDevice_t)device; \
-};
-// hipMemcpyDtoH[('void*', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes')]
-#define INIT_hipMemcpyDtoH_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemcpyDtoH.dst = (void*)dstHost; \
- cb_data.args.hipMemcpyDtoH.src = (hipDeviceptr_t)srcDevice; \
- cb_data.args.hipMemcpyDtoH.sizeBytes = (size_t)ByteCount; \
-};
-// hipMemcpyWithStream[('void*', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')]
-#define INIT_hipMemcpyWithStream_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemcpyWithStream.dst = (void*)dst; \
- cb_data.args.hipMemcpyWithStream.src = (const void*)src; \
- cb_data.args.hipMemcpyWithStream.sizeBytes = (size_t)sizeBytes; \
- cb_data.args.hipMemcpyWithStream.kind = (hipMemcpyKind)kind; \
- cb_data.args.hipMemcpyWithStream.stream = (hipStream_t)stream; \
-};
-// hipDeviceTotalMem[('size_t*', 'bytes'), ('hipDevice_t', 'device')]
-#define INIT_hipDeviceTotalMem_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipDeviceTotalMem.bytes = (size_t*)bytes; \
- cb_data.args.hipDeviceTotalMem.device = (hipDevice_t)device; \
-};
-// hipHostGetDevicePointer[('void**', 'devPtr'), ('void*', 'hstPtr'), ('unsigned int', 'flags')]
-#define INIT_hipHostGetDevicePointer_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipHostGetDevicePointer.devPtr = (void**)devicePointer; \
- cb_data.args.hipHostGetDevicePointer.hstPtr = (void*)hostPointer; \
- cb_data.args.hipHostGetDevicePointer.flags = (unsigned int)flags; \
-};
-// hipMemRangeGetAttributes[('void**', 'data'), ('size_t*', 'data_sizes'), ('hipMemRangeAttribute*', 'attributes'), ('size_t', 'num_attributes'), ('const void*', 'dev_ptr'), ('size_t', 'count')]
-#define INIT_hipMemRangeGetAttributes_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemRangeGetAttributes.data = (void**)data; \
- cb_data.args.hipMemRangeGetAttributes.data_sizes = (size_t*)data_sizes; \
- cb_data.args.hipMemRangeGetAttributes.attributes = (hipMemRangeAttribute*)attributes; \
- cb_data.args.hipMemRangeGetAttributes.num_attributes = (size_t)num_attributes; \
- cb_data.args.hipMemRangeGetAttributes.dev_ptr = (const void*)dev_ptr; \
- cb_data.args.hipMemRangeGetAttributes.count = (size_t)count; \
-};
-// hipMemcpyParam2D[('const hip_Memcpy2D*', 'pCopy')]
-#define INIT_hipMemcpyParam2D_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemcpyParam2D.pCopy = (const hip_Memcpy2D*)pCopy; \
-};
-// hipDevicePrimaryCtxReset[('hipDevice_t', 'dev')]
-#define INIT_hipDevicePrimaryCtxReset_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipDevicePrimaryCtxReset.dev = (hipDevice_t)dev; \
-};
-// hipGetMipmappedArrayLevel[('hipArray_t*', 'levelArray'), ('hipMipmappedArray_const_t', 'mipmappedArray'), ('unsigned int', 'level')]
-#define INIT_hipGetMipmappedArrayLevel_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipGetMipmappedArrayLevel.levelArray = (hipArray_t*)levelArray; \
- cb_data.args.hipGetMipmappedArrayLevel.mipmappedArray = (hipMipmappedArray_const_t)mipmappedArray; \
- cb_data.args.hipGetMipmappedArrayLevel.level = (unsigned int)level; \
-};
-// hipMemsetD32Async[('hipDeviceptr_t', 'dst'), ('int', 'value'), ('size_t', 'count'), ('hipStream_t', 'stream')]
-#define INIT_hipMemsetD32Async_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipMemsetD32Async.dst = (hipDeviceptr_t)dst; \
- cb_data.args.hipMemsetD32Async.value = (int)value; \
- cb_data.args.hipMemsetD32Async.count = (size_t)count; \
- cb_data.args.hipMemsetD32Async.stream = (hipStream_t)stream; \
-};
-// hipGetDevice[('int*', 'deviceId')]
-#define INIT_hipGetDevice_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipGetDevice.deviceId = (int*)deviceId; \
-};
-// hipGetDeviceCount[('int*', 'count')]
-#define INIT_hipGetDeviceCount_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipGetDeviceCount.count = (int*)count; \
-};
-// hipIpcOpenEventHandle[('hipEvent_t*', 'event'), ('hipIpcEventHandle_t', 'handle')]
-#define INIT_hipIpcOpenEventHandle_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipIpcOpenEventHandle.event = (hipEvent_t*)event; \
- cb_data.args.hipIpcOpenEventHandle.handle = (hipIpcEventHandle_t)handle; \
-};
-#define INIT_CB_ARGS_DATA(cb_id, cb_data) INIT_##cb_id##_CB_ARGS_DATA(cb_data)
-#if HIP_PROF_HIP_API_STRING
-
-// HIP API args filling method
-static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) {
- switch (id) {
-// hipDrvMemcpy3DAsync[('const HIP_MEMCPY3D*', 'pCopy'), ('hipStream_t', 'stream')]
- case HIP_API_ID_hipDrvMemcpy3DAsync:
- if (data->args.hipDrvMemcpy3DAsync.pCopy) data->args.hipDrvMemcpy3DAsync.pCopy__val = *(data->args.hipDrvMemcpy3DAsync.pCopy);
- break;
-// hipDeviceEnablePeerAccess[('int', 'peerDeviceId'), ('unsigned int', 'flags')]
- case HIP_API_ID_hipDeviceEnablePeerAccess:
- break;
-// hipFuncSetSharedMemConfig[('const void*', 'func'), ('hipSharedMemConfig', 'config')]
- case HIP_API_ID_hipFuncSetSharedMemConfig:
- break;
-// hipMemcpyToSymbolAsync[('const void*', 'symbol'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')]
- case HIP_API_ID_hipMemcpyToSymbolAsync:
- break;
-// hipMallocPitch[('void**', 'ptr'), ('size_t*', 'pitch'), ('size_t', 'width'), ('size_t', 'height')]
- case HIP_API_ID_hipMallocPitch:
- if (data->args.hipMallocPitch.ptr) data->args.hipMallocPitch.ptr__val = *(data->args.hipMallocPitch.ptr);
- if (data->args.hipMallocPitch.pitch) data->args.hipMallocPitch.pitch__val = *(data->args.hipMallocPitch.pitch);
- break;
-// hipMalloc[('void**', 'ptr'), ('size_t', 'size')]
- case HIP_API_ID_hipMalloc:
- if (data->args.hipMalloc.ptr) data->args.hipMalloc.ptr__val = *(data->args.hipMalloc.ptr);
- break;
-// hipMemsetD16[('hipDeviceptr_t', 'dest'), ('unsigned short', 'value'), ('size_t', 'count')]
- case HIP_API_ID_hipMemsetD16:
- break;
-// hipExtStreamGetCUMask[('hipStream_t', 'stream'), ('unsigned int', 'cuMaskSize'), ('unsigned int*', 'cuMask')]
- case HIP_API_ID_hipExtStreamGetCUMask:
- if (data->args.hipExtStreamGetCUMask.cuMask) data->args.hipExtStreamGetCUMask.cuMask__val = *(data->args.hipExtStreamGetCUMask.cuMask);
- break;
-// hipEventRecord[('hipEvent_t', 'event'), ('hipStream_t', 'stream')]
- case HIP_API_ID_hipEventRecord:
- break;
-// hipCtxSynchronize[]
- case HIP_API_ID_hipCtxSynchronize:
- break;
-// hipSetDevice[('int', 'deviceId')]
- case HIP_API_ID_hipSetDevice:
- break;
-// hipCtxGetApiVersion[('hipCtx_t', 'ctx'), ('int*', 'apiVersion')]
- case HIP_API_ID_hipCtxGetApiVersion:
- if (data->args.hipCtxGetApiVersion.apiVersion) data->args.hipCtxGetApiVersion.apiVersion__val = *(data->args.hipCtxGetApiVersion.apiVersion);
- break;
-// hipMemcpyFromSymbolAsync[('void*', 'dst'), ('const void*', 'symbol'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')]
- case HIP_API_ID_hipMemcpyFromSymbolAsync:
- break;
-// hipExtGetLinkTypeAndHopCount[('int', 'device1'), ('int', 'device2'), ('unsigned int*', 'linktype'), ('unsigned int*', 'hopcount')]
- case HIP_API_ID_hipExtGetLinkTypeAndHopCount:
- if (data->args.hipExtGetLinkTypeAndHopCount.linktype) data->args.hipExtGetLinkTypeAndHopCount.linktype__val = *(data->args.hipExtGetLinkTypeAndHopCount.linktype);
- if (data->args.hipExtGetLinkTypeAndHopCount.hopcount) data->args.hipExtGetLinkTypeAndHopCount.hopcount__val = *(data->args.hipExtGetLinkTypeAndHopCount.hopcount);
- break;
-// __hipPopCallConfiguration[('dim3*', 'gridDim'), ('dim3*', 'blockDim'), ('size_t*', 'sharedMem'), ('hipStream_t*', 'stream')]
- case HIP_API_ID___hipPopCallConfiguration:
- if (data->args.__hipPopCallConfiguration.gridDim) data->args.__hipPopCallConfiguration.gridDim__val = *(data->args.__hipPopCallConfiguration.gridDim);
- if (data->args.__hipPopCallConfiguration.blockDim) data->args.__hipPopCallConfiguration.blockDim__val = *(data->args.__hipPopCallConfiguration.blockDim);
- if (data->args.__hipPopCallConfiguration.sharedMem) data->args.__hipPopCallConfiguration.sharedMem__val = *(data->args.__hipPopCallConfiguration.sharedMem);
- if (data->args.__hipPopCallConfiguration.stream) data->args.__hipPopCallConfiguration.stream__val = *(data->args.__hipPopCallConfiguration.stream);
- break;
-// hipModuleOccupancyMaxActiveBlocksPerMultiprocessor[('int*', 'numBlocks'), ('hipFunction_t', 'f'), ('int', 'blockSize'), ('size_t', 'dynSharedMemPerBlk')]
- case HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor:
- if (data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks) data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks__val = *(data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks);
- break;
-// hipMemset3D[('hipPitchedPtr', 'pitchedDevPtr'), ('int', 'value'), ('hipExtent', 'extent')]
- case HIP_API_ID_hipMemset3D:
- break;
-// hipStreamCreateWithPriority[('hipStream_t*', 'stream'), ('unsigned int', 'flags'), ('int', 'priority')]
- case HIP_API_ID_hipStreamCreateWithPriority:
- if (data->args.hipStreamCreateWithPriority.stream) data->args.hipStreamCreateWithPriority.stream__val = *(data->args.hipStreamCreateWithPriority.stream);
- break;
-// hipMemcpy2DToArray[('hipArray*', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind')]
- case HIP_API_ID_hipMemcpy2DToArray:
- if (data->args.hipMemcpy2DToArray.dst) data->args.hipMemcpy2DToArray.dst__val = *(data->args.hipMemcpy2DToArray.dst);
- break;
-// hipMemsetD8Async[('hipDeviceptr_t', 'dest'), ('unsigned char', 'value'), ('size_t', 'count'), ('hipStream_t', 'stream')]
- case HIP_API_ID_hipMemsetD8Async:
- break;
-// hipCtxGetCacheConfig[('hipFuncCache_t*', 'cacheConfig')]
- case HIP_API_ID_hipCtxGetCacheConfig:
- if (data->args.hipCtxGetCacheConfig.cacheConfig) data->args.hipCtxGetCacheConfig.cacheConfig__val = *(data->args.hipCtxGetCacheConfig.cacheConfig);
- break;
-// hipModuleGetFunction[('hipFunction_t*', 'function'), ('hipModule_t', 'module'), ('const char*', 'kname')]
- case HIP_API_ID_hipModuleGetFunction:
- if (data->args.hipModuleGetFunction.function) data->args.hipModuleGetFunction.function__val = *(data->args.hipModuleGetFunction.function);
- if (data->args.hipModuleGetFunction.kname) data->args.hipModuleGetFunction.kname__val = *(data->args.hipModuleGetFunction.kname);
- break;
-// hipStreamWaitEvent[('hipStream_t', 'stream'), ('hipEvent_t', 'event'), ('unsigned int', 'flags')]
- case HIP_API_ID_hipStreamWaitEvent:
- break;
-// hipDeviceGetStreamPriorityRange[('int*', 'leastPriority'), ('int*', 'greatestPriority')]
- case HIP_API_ID_hipDeviceGetStreamPriorityRange:
- if (data->args.hipDeviceGetStreamPriorityRange.leastPriority) data->args.hipDeviceGetStreamPriorityRange.leastPriority__val = *(data->args.hipDeviceGetStreamPriorityRange.leastPriority);
- if (data->args.hipDeviceGetStreamPriorityRange.greatestPriority) data->args.hipDeviceGetStreamPriorityRange.greatestPriority__val = *(data->args.hipDeviceGetStreamPriorityRange.greatestPriority);
- break;
-// hipModuleLoad[('hipModule_t*', 'module'), ('const char*', 'fname')]
- case HIP_API_ID_hipModuleLoad:
- if (data->args.hipModuleLoad.module) data->args.hipModuleLoad.module__val = *(data->args.hipModuleLoad.module);
- if (data->args.hipModuleLoad.fname) data->args.hipModuleLoad.fname__val = *(data->args.hipModuleLoad.fname);
- break;
-// hipDevicePrimaryCtxSetFlags[('hipDevice_t', 'dev'), ('unsigned int', 'flags')]
- case HIP_API_ID_hipDevicePrimaryCtxSetFlags:
- break;
-// hipLaunchCooperativeKernel[('const void*', 'f'), ('dim3', 'gridDim'), ('dim3', 'blockDimX'), ('void**', 'kernelParams'), ('unsigned int', 'sharedMemBytes'), ('hipStream_t', 'stream')]
- case HIP_API_ID_hipLaunchCooperativeKernel:
- if (data->args.hipLaunchCooperativeKernel.kernelParams) data->args.hipLaunchCooperativeKernel.kernelParams__val = *(data->args.hipLaunchCooperativeKernel.kernelParams);
- break;
-// hipLaunchCooperativeKernelMultiDevice[('hipLaunchParams*', 'launchParamsList'), ('int', 'numDevices'), ('unsigned int', 'flags')]
- case HIP_API_ID_hipLaunchCooperativeKernelMultiDevice:
- if (data->args.hipLaunchCooperativeKernelMultiDevice.launchParamsList) data->args.hipLaunchCooperativeKernelMultiDevice.launchParamsList__val = *(data->args.hipLaunchCooperativeKernelMultiDevice.launchParamsList);
- break;
-// hipMemcpyAsync[('void*', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')]
- case HIP_API_ID_hipMemcpyAsync:
- break;
-// hipMalloc3DArray[('hipArray_t*', 'array'), ('const hipChannelFormatDesc*', 'desc'), ('hipExtent', 'extent'), ('unsigned int', 'flags')]
- case HIP_API_ID_hipMalloc3DArray:
- if (data->args.hipMalloc3DArray.array) data->args.hipMalloc3DArray.array__val = *(data->args.hipMalloc3DArray.array);
- if (data->args.hipMalloc3DArray.desc) data->args.hipMalloc3DArray.desc__val = *(data->args.hipMalloc3DArray.desc);
- break;
-// hipMallocHost[('void**', 'ptr'), ('size_t', 'size')]
- case HIP_API_ID_hipMallocHost:
- if (data->args.hipMallocHost.ptr) data->args.hipMallocHost.ptr__val = *(data->args.hipMallocHost.ptr);
- break;
-// hipCtxGetCurrent[('hipCtx_t*', 'ctx')]
- case HIP_API_ID_hipCtxGetCurrent:
- if (data->args.hipCtxGetCurrent.ctx) data->args.hipCtxGetCurrent.ctx__val = *(data->args.hipCtxGetCurrent.ctx);
- break;
-// hipDevicePrimaryCtxGetState[('hipDevice_t', 'dev'), ('unsigned int*', 'flags'), ('int*', 'active')]
- case HIP_API_ID_hipDevicePrimaryCtxGetState:
- if (data->args.hipDevicePrimaryCtxGetState.flags) data->args.hipDevicePrimaryCtxGetState.flags__val = *(data->args.hipDevicePrimaryCtxGetState.flags);
- if (data->args.hipDevicePrimaryCtxGetState.active) data->args.hipDevicePrimaryCtxGetState.active__val = *(data->args.hipDevicePrimaryCtxGetState.active);
- break;
-// hipEventQuery[('hipEvent_t', 'event')]
- case HIP_API_ID_hipEventQuery:
- break;
-// hipEventCreate[('hipEvent_t*', 'event')]
- case HIP_API_ID_hipEventCreate:
- if (data->args.hipEventCreate.event) data->args.hipEventCreate.event__val = *(data->args.hipEventCreate.event);
- break;
-// hipMemGetAddressRange[('hipDeviceptr_t*', 'pbase'), ('size_t*', 'psize'), ('hipDeviceptr_t', 'dptr')]
- case HIP_API_ID_hipMemGetAddressRange:
- if (data->args.hipMemGetAddressRange.pbase) data->args.hipMemGetAddressRange.pbase__val = *(data->args.hipMemGetAddressRange.pbase);
- if (data->args.hipMemGetAddressRange.psize) data->args.hipMemGetAddressRange.psize__val = *(data->args.hipMemGetAddressRange.psize);
- break;
-// hipMemcpyFromSymbol[('void*', 'dst'), ('const void*', 'symbol'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')]
- case HIP_API_ID_hipMemcpyFromSymbol:
- break;
-// hipArrayCreate[('hipArray**', 'pHandle'), ('const HIP_ARRAY_DESCRIPTOR*', 'pAllocateArray')]
- case HIP_API_ID_hipArrayCreate:
- if (data->args.hipArrayCreate.pHandle) data->args.hipArrayCreate.pHandle__val = *(data->args.hipArrayCreate.pHandle);
- if (data->args.hipArrayCreate.pAllocateArray) data->args.hipArrayCreate.pAllocateArray__val = *(data->args.hipArrayCreate.pAllocateArray);
- break;
-// hipStreamAttachMemAsync[('hipStream_t', 'stream'), ('hipDeviceptr_t*', 'dev_ptr'), ('size_t', 'length'), ('unsigned int', 'flags')]
- case HIP_API_ID_hipStreamAttachMemAsync:
- if (data->args.hipStreamAttachMemAsync.dev_ptr) data->args.hipStreamAttachMemAsync.dev_ptr__val = *(data->args.hipStreamAttachMemAsync.dev_ptr);
- break;
-// hipStreamGetFlags[('hipStream_t', 'stream'), ('unsigned int*', 'flags')]
- case HIP_API_ID_hipStreamGetFlags:
- if (data->args.hipStreamGetFlags.flags) data->args.hipStreamGetFlags.flags__val = *(data->args.hipStreamGetFlags.flags);
- break;
-// hipMallocArray[('hipArray**', 'array'), ('const hipChannelFormatDesc*', 'desc'), ('size_t', 'width'), ('size_t', 'height'), ('unsigned int', 'flags')]
- case HIP_API_ID_hipMallocArray:
- if (data->args.hipMallocArray.array) data->args.hipMallocArray.array__val = *(data->args.hipMallocArray.array);
- if (data->args.hipMallocArray.desc) data->args.hipMallocArray.desc__val = *(data->args.hipMallocArray.desc);
- break;
-// hipCtxGetSharedMemConfig[('hipSharedMemConfig*', 'pConfig')]
- case HIP_API_ID_hipCtxGetSharedMemConfig:
- if (data->args.hipCtxGetSharedMemConfig.pConfig) data->args.hipCtxGetSharedMemConfig.pConfig__val = *(data->args.hipCtxGetSharedMemConfig.pConfig);
- break;
-// hipDeviceDisablePeerAccess[('int', 'peerDeviceId')]
- case HIP_API_ID_hipDeviceDisablePeerAccess:
- break;
-// hipModuleOccupancyMaxPotentialBlockSize[('int*', 'gridSize'), ('int*', 'blockSize'), ('hipFunction_t', 'f'), ('size_t', 'dynSharedMemPerBlk'), ('int', 'blockSizeLimit')]
- case HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSize:
- if (data->args.hipModuleOccupancyMaxPotentialBlockSize.gridSize) data->args.hipModuleOccupancyMaxPotentialBlockSize.gridSize__val = *(data->args.hipModuleOccupancyMaxPotentialBlockSize.gridSize);
- if (data->args.hipModuleOccupancyMaxPotentialBlockSize.blockSize) data->args.hipModuleOccupancyMaxPotentialBlockSize.blockSize__val = *(data->args.hipModuleOccupancyMaxPotentialBlockSize.blockSize);
- break;
-// hipMemPtrGetInfo[('void*', 'ptr'), ('size_t*', 'size')]
- case HIP_API_ID_hipMemPtrGetInfo:
- if (data->args.hipMemPtrGetInfo.size) data->args.hipMemPtrGetInfo.size__val = *(data->args.hipMemPtrGetInfo.size);
- break;
-// hipFuncGetAttribute[('int*', 'value'), ('hipFunction_attribute', 'attrib'), ('hipFunction_t', 'hfunc')]
- case HIP_API_ID_hipFuncGetAttribute:
- if (data->args.hipFuncGetAttribute.value) data->args.hipFuncGetAttribute.value__val = *(data->args.hipFuncGetAttribute.value);
- break;
-// hipCtxGetFlags[('unsigned int*', 'flags')]
- case HIP_API_ID_hipCtxGetFlags:
- if (data->args.hipCtxGetFlags.flags) data->args.hipCtxGetFlags.flags__val = *(data->args.hipCtxGetFlags.flags);
- break;
-// hipStreamDestroy[('hipStream_t', 'stream')]
- case HIP_API_ID_hipStreamDestroy:
- break;
-// __hipPushCallConfiguration[('dim3', 'gridDim'), ('dim3', 'blockDim'), ('size_t', 'sharedMem'), ('hipStream_t', 'stream')]
- case HIP_API_ID___hipPushCallConfiguration:
- break;
-// hipMemset3DAsync[('hipPitchedPtr', 'pitchedDevPtr'), ('int', 'value'), ('hipExtent', 'extent'), ('hipStream_t', 'stream')]
- case HIP_API_ID_hipMemset3DAsync:
- break;
-// hipDeviceGetPCIBusId[('char*', 'pciBusId'), ('int', 'len'), ('int', 'device')]
- case HIP_API_ID_hipDeviceGetPCIBusId:
- data->args.hipDeviceGetPCIBusId.pciBusId = (data->args.hipDeviceGetPCIBusId.pciBusId) ? strdup(data->args.hipDeviceGetPCIBusId.pciBusId) : NULL;
- break;
-// hipInit[('unsigned int', 'flags')]
- case HIP_API_ID_hipInit:
- break;
-// hipMemcpyAtoH[('void*', 'dst'), ('hipArray*', 'srcArray'), ('size_t', 'srcOffset'), ('size_t', 'count')]
- case HIP_API_ID_hipMemcpyAtoH:
- if (data->args.hipMemcpyAtoH.srcArray) data->args.hipMemcpyAtoH.srcArray__val = *(data->args.hipMemcpyAtoH.srcArray);
- break;
-// hipStreamGetPriority[('hipStream_t', 'stream'), ('int*', 'priority')]
- case HIP_API_ID_hipStreamGetPriority:
- if (data->args.hipStreamGetPriority.priority) data->args.hipStreamGetPriority.priority__val = *(data->args.hipStreamGetPriority.priority);
- break;
-// hipMemset2D[('void*', 'dst'), ('size_t', 'pitch'), ('int', 'value'), ('size_t', 'width'), ('size_t', 'height')]
- case HIP_API_ID_hipMemset2D:
- break;
-// hipMemset2DAsync[('void*', 'dst'), ('size_t', 'pitch'), ('int', 'value'), ('size_t', 'width'), ('size_t', 'height'), ('hipStream_t', 'stream')]
- case HIP_API_ID_hipMemset2DAsync:
- break;
-// hipDeviceCanAccessPeer[('int*', 'canAccessPeer'), ('int', 'deviceId'), ('int', 'peerDeviceId')]
- case HIP_API_ID_hipDeviceCanAccessPeer:
- if (data->args.hipDeviceCanAccessPeer.canAccessPeer) data->args.hipDeviceCanAccessPeer.canAccessPeer__val = *(data->args.hipDeviceCanAccessPeer.canAccessPeer);
- break;
-// hipLaunchByPtr[('const void*', 'hostFunction')]
- case HIP_API_ID_hipLaunchByPtr:
- break;
-// hipMemPrefetchAsync[('const void*', 'dev_ptr'), ('size_t', 'count'), ('int', 'device'), ('hipStream_t', 'stream')]
- case HIP_API_ID_hipMemPrefetchAsync:
- break;
-// hipCtxDestroy[('hipCtx_t', 'ctx')]
- case HIP_API_ID_hipCtxDestroy:
- break;
-// hipMemsetD16Async[('hipDeviceptr_t', 'dest'), ('unsigned short', 'value'), ('size_t', 'count'), ('hipStream_t', 'stream')]
- case HIP_API_ID_hipMemsetD16Async:
- break;
-// hipModuleUnload[('hipModule_t', 'module')]
- case HIP_API_ID_hipModuleUnload:
- break;
-// hipHostUnregister[('void*', 'hostPtr')]
- case HIP_API_ID_hipHostUnregister:
- break;
-// hipProfilerStop[]
- case HIP_API_ID_hipProfilerStop:
- break;
-// hipExtStreamCreateWithCUMask[('hipStream_t*', 'stream'), ('unsigned int', 'cuMaskSize'), ('const unsigned int*', 'cuMask')]
- case HIP_API_ID_hipExtStreamCreateWithCUMask:
- if (data->args.hipExtStreamCreateWithCUMask.stream) data->args.hipExtStreamCreateWithCUMask.stream__val = *(data->args.hipExtStreamCreateWithCUMask.stream);
- if (data->args.hipExtStreamCreateWithCUMask.cuMask) data->args.hipExtStreamCreateWithCUMask.cuMask__val = *(data->args.hipExtStreamCreateWithCUMask.cuMask);
- break;
-// hipStreamSynchronize[('hipStream_t', 'stream')]
- case HIP_API_ID_hipStreamSynchronize:
- break;
-// hipFreeHost[('void*', 'ptr')]
- case HIP_API_ID_hipFreeHost:
- break;
-// hipDeviceSetCacheConfig[('hipFuncCache_t', 'cacheConfig')]
- case HIP_API_ID_hipDeviceSetCacheConfig:
- break;
-// hipGetErrorName[]
- case HIP_API_ID_hipGetErrorName:
- break;
-// hipMemcpyHtoD[('hipDeviceptr_t', 'dst'), ('void*', 'src'), ('size_t', 'sizeBytes')]
- case HIP_API_ID_hipMemcpyHtoD:
- break;
-// hipModuleGetGlobal[('hipDeviceptr_t*', 'dptr'), ('size_t*', 'bytes'), ('hipModule_t', 'hmod'), ('const char*', 'name')]
- case HIP_API_ID_hipModuleGetGlobal:
- if (data->args.hipModuleGetGlobal.dptr) data->args.hipModuleGetGlobal.dptr__val = *(data->args.hipModuleGetGlobal.dptr);
- if (data->args.hipModuleGetGlobal.bytes) data->args.hipModuleGetGlobal.bytes__val = *(data->args.hipModuleGetGlobal.bytes);
- if (data->args.hipModuleGetGlobal.name) data->args.hipModuleGetGlobal.name__val = *(data->args.hipModuleGetGlobal.name);
- break;
-// hipMemcpyHtoA[('hipArray*', 'dstArray'), ('size_t', 'dstOffset'), ('const void*', 'srcHost'), ('size_t', 'count')]
- case HIP_API_ID_hipMemcpyHtoA:
- if (data->args.hipMemcpyHtoA.dstArray) data->args.hipMemcpyHtoA.dstArray__val = *(data->args.hipMemcpyHtoA.dstArray);
- break;
-// hipCtxCreate[('hipCtx_t*', 'ctx'), ('unsigned int', 'flags'), ('hipDevice_t', 'device')]
- case HIP_API_ID_hipCtxCreate:
- if (data->args.hipCtxCreate.ctx) data->args.hipCtxCreate.ctx__val = *(data->args.hipCtxCreate.ctx);
- break;
-// hipMemcpy2D[('void*', 'dst'), ('size_t', 'dpitch'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind')]
- case HIP_API_ID_hipMemcpy2D:
- break;
-// hipIpcCloseMemHandle[('void*', 'devPtr')]
- case HIP_API_ID_hipIpcCloseMemHandle:
- break;
-// hipChooseDevice[('int*', 'device'), ('const hipDeviceProp_t*', 'prop')]
- case HIP_API_ID_hipChooseDevice:
- if (data->args.hipChooseDevice.device) data->args.hipChooseDevice.device__val = *(data->args.hipChooseDevice.device);
- if (data->args.hipChooseDevice.prop) data->args.hipChooseDevice.prop__val = *(data->args.hipChooseDevice.prop);
- break;
-// hipDeviceSetSharedMemConfig[('hipSharedMemConfig', 'config')]
- case HIP_API_ID_hipDeviceSetSharedMemConfig:
- break;
-// hipMallocMipmappedArray[('hipMipmappedArray_t*', 'mipmappedArray'), ('const hipChannelFormatDesc*', 'desc'), ('hipExtent', 'extent'), ('unsigned int', 'numLevels'), ('unsigned int', 'flags')]
- case HIP_API_ID_hipMallocMipmappedArray:
- if (data->args.hipMallocMipmappedArray.mipmappedArray) data->args.hipMallocMipmappedArray.mipmappedArray__val = *(data->args.hipMallocMipmappedArray.mipmappedArray);
- if (data->args.hipMallocMipmappedArray.desc) data->args.hipMallocMipmappedArray.desc__val = *(data->args.hipMallocMipmappedArray.desc);
- break;
-// hipSetupArgument[('const void*', 'arg'), ('size_t', 'size'), ('size_t', 'offset')]
- case HIP_API_ID_hipSetupArgument:
- break;
-// hipIpcGetEventHandle[('hipIpcEventHandle_t*', 'handle'), ('hipEvent_t', 'event')]
- case HIP_API_ID_hipIpcGetEventHandle:
- if (data->args.hipIpcGetEventHandle.handle) data->args.hipIpcGetEventHandle.handle__val = *(data->args.hipIpcGetEventHandle.handle);
- break;
-// hipFreeArray[('hipArray*', 'array')]
- case HIP_API_ID_hipFreeArray:
- if (data->args.hipFreeArray.array) data->args.hipFreeArray.array__val = *(data->args.hipFreeArray.array);
- break;
-// hipCtxSetCacheConfig[('hipFuncCache_t', 'cacheConfig')]
- case HIP_API_ID_hipCtxSetCacheConfig:
- break;
-// hipFuncSetCacheConfig[('const void*', 'func'), ('hipFuncCache_t', 'config')]
- case HIP_API_ID_hipFuncSetCacheConfig:
- break;
-// hipLaunchKernel[('const void*', 'function_address'), ('dim3', 'numBlocks'), ('dim3', 'dimBlocks'), ('void**', 'args'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'stream')]
- case HIP_API_ID_hipLaunchKernel:
- if (data->args.hipLaunchKernel.args) data->args.hipLaunchKernel.args__val = *(data->args.hipLaunchKernel.args);
- break;
-// hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags[('int*', 'numBlocks'), ('hipFunction_t', 'f'), ('int', 'blockSize'), ('size_t', 'dynSharedMemPerBlk'), ('unsigned int', 'flags')]
- case HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags:
- if (data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks) data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks__val = *(data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks);
- break;
-// hipModuleGetTexRef[('textureReference**', 'texRef'), ('hipModule_t', 'hmod'), ('const char*', 'name')]
- case HIP_API_ID_hipModuleGetTexRef:
- if (data->args.hipModuleGetTexRef.texRef) data->args.hipModuleGetTexRef.texRef__val = *(data->args.hipModuleGetTexRef.texRef);
- if (data->args.hipModuleGetTexRef.name) data->args.hipModuleGetTexRef.name__val = *(data->args.hipModuleGetTexRef.name);
- break;
-// hipFuncSetAttribute[('const void*', 'func'), ('hipFuncAttribute', 'attr'), ('int', 'value')]
- case HIP_API_ID_hipFuncSetAttribute:
- break;
-// hipEventElapsedTime[('float*', 'ms'), ('hipEvent_t', 'start'), ('hipEvent_t', 'stop')]
- case HIP_API_ID_hipEventElapsedTime:
- if (data->args.hipEventElapsedTime.ms) data->args.hipEventElapsedTime.ms__val = *(data->args.hipEventElapsedTime.ms);
- break;
-// hipConfigureCall[('dim3', 'gridDim'), ('dim3', 'blockDim'), ('size_t', 'sharedMem'), ('hipStream_t', 'stream')]
- case HIP_API_ID_hipConfigureCall:
- break;
-// hipMemAdvise[('const void*', 'dev_ptr'), ('size_t', 'count'), ('hipMemoryAdvise', 'advice'), ('int', 'device')]
- case HIP_API_ID_hipMemAdvise:
- break;
-// hipMemcpy3DAsync[('const hipMemcpy3DParms*', 'p'), ('hipStream_t', 'stream')]
- case HIP_API_ID_hipMemcpy3DAsync:
- if (data->args.hipMemcpy3DAsync.p) data->args.hipMemcpy3DAsync.p__val = *(data->args.hipMemcpy3DAsync.p);
- break;
-// hipEventDestroy[('hipEvent_t', 'event')]
- case HIP_API_ID_hipEventDestroy:
- break;
-// hipCtxPopCurrent[('hipCtx_t*', 'ctx')]
- case HIP_API_ID_hipCtxPopCurrent:
- if (data->args.hipCtxPopCurrent.ctx) data->args.hipCtxPopCurrent.ctx__val = *(data->args.hipCtxPopCurrent.ctx);
- break;
-// hipGetSymbolAddress[('void**', 'devPtr'), ('const void*', 'symbol')]
- case HIP_API_ID_hipGetSymbolAddress:
- if (data->args.hipGetSymbolAddress.devPtr) data->args.hipGetSymbolAddress.devPtr__val = *(data->args.hipGetSymbolAddress.devPtr);
- break;
-// hipHostGetFlags[('unsigned int*', 'flagsPtr'), ('void*', 'hostPtr')]
- case HIP_API_ID_hipHostGetFlags:
- if (data->args.hipHostGetFlags.flagsPtr) data->args.hipHostGetFlags.flagsPtr__val = *(data->args.hipHostGetFlags.flagsPtr);
- break;
-// hipHostMalloc[('void**', 'ptr'), ('size_t', 'size'), ('unsigned int', 'flags')]
- case HIP_API_ID_hipHostMalloc:
- if (data->args.hipHostMalloc.ptr) data->args.hipHostMalloc.ptr__val = *(data->args.hipHostMalloc.ptr);
- break;
-// hipCtxSetSharedMemConfig[('hipSharedMemConfig', 'config')]
- case HIP_API_ID_hipCtxSetSharedMemConfig:
- break;
-// hipFreeMipmappedArray[('hipMipmappedArray_t', 'mipmappedArray')]
- case HIP_API_ID_hipFreeMipmappedArray:
- break;
-// hipMemGetInfo[('size_t*', 'free'), ('size_t*', 'total')]
- case HIP_API_ID_hipMemGetInfo:
- if (data->args.hipMemGetInfo.free) data->args.hipMemGetInfo.free__val = *(data->args.hipMemGetInfo.free);
- if (data->args.hipMemGetInfo.total) data->args.hipMemGetInfo.total__val = *(data->args.hipMemGetInfo.total);
- break;
-// hipDeviceReset[]
- case HIP_API_ID_hipDeviceReset:
- break;
-// hipMemset[('void*', 'dst'), ('int', 'value'), ('size_t', 'sizeBytes')]
- case HIP_API_ID_hipMemset:
- break;
-// hipMemsetD8[('hipDeviceptr_t', 'dest'), ('unsigned char', 'value'), ('size_t', 'count')]
- case HIP_API_ID_hipMemsetD8:
- break;
-// hipMemcpyParam2DAsync[('const hip_Memcpy2D*', 'pCopy'), ('hipStream_t', 'stream')]
- case HIP_API_ID_hipMemcpyParam2DAsync:
- if (data->args.hipMemcpyParam2DAsync.pCopy) data->args.hipMemcpyParam2DAsync.pCopy__val = *(data->args.hipMemcpyParam2DAsync.pCopy);
- break;
-// hipHostRegister[('void*', 'hostPtr'), ('size_t', 'sizeBytes'), ('unsigned int', 'flags')]
- case HIP_API_ID_hipHostRegister:
- break;
-// hipDriverGetVersion[('int*', 'driverVersion')]
- case HIP_API_ID_hipDriverGetVersion:
- if (data->args.hipDriverGetVersion.driverVersion) data->args.hipDriverGetVersion.driverVersion__val = *(data->args.hipDriverGetVersion.driverVersion);
- break;
-// hipArray3DCreate[('hipArray**', 'array'), ('const HIP_ARRAY3D_DESCRIPTOR*', 'pAllocateArray')]
- case HIP_API_ID_hipArray3DCreate:
- if (data->args.hipArray3DCreate.array) data->args.hipArray3DCreate.array__val = *(data->args.hipArray3DCreate.array);
- if (data->args.hipArray3DCreate.pAllocateArray) data->args.hipArray3DCreate.pAllocateArray__val = *(data->args.hipArray3DCreate.pAllocateArray);
- break;
-// hipIpcOpenMemHandle[('void**', 'devPtr'), ('hipIpcMemHandle_t', 'handle'), ('unsigned int', 'flags')]
- case HIP_API_ID_hipIpcOpenMemHandle:
- if (data->args.hipIpcOpenMemHandle.devPtr) data->args.hipIpcOpenMemHandle.devPtr__val = *(data->args.hipIpcOpenMemHandle.devPtr);
- break;
-// hipGetLastError[]
- case HIP_API_ID_hipGetLastError:
- break;
-// hipGetDeviceFlags[('unsigned int*', 'flags')]
- case HIP_API_ID_hipGetDeviceFlags:
- if (data->args.hipGetDeviceFlags.flags) data->args.hipGetDeviceFlags.flags__val = *(data->args.hipGetDeviceFlags.flags);
- break;
-// hipDeviceGetSharedMemConfig[('hipSharedMemConfig*', 'pConfig')]
- case HIP_API_ID_hipDeviceGetSharedMemConfig:
- if (data->args.hipDeviceGetSharedMemConfig.pConfig) data->args.hipDeviceGetSharedMemConfig.pConfig__val = *(data->args.hipDeviceGetSharedMemConfig.pConfig);
- break;
-// hipDrvMemcpy3D[('const HIP_MEMCPY3D*', 'pCopy')]
- case HIP_API_ID_hipDrvMemcpy3D:
- if (data->args.hipDrvMemcpy3D.pCopy) data->args.hipDrvMemcpy3D.pCopy__val = *(data->args.hipDrvMemcpy3D.pCopy);
- break;
-// hipMemcpy2DFromArray[('void*', 'dst'), ('size_t', 'dpitch'), ('hipArray_const_t', 'src'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind')]
- case HIP_API_ID_hipMemcpy2DFromArray:
- break;
-// hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags[('int*', 'numBlocks'), ('const void*', 'f'), ('int', 'blockSize'), ('size_t', 'dynamicSMemSize'), ('unsigned int', 'flags')]
- case HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags:
- if (data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks) data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks__val = *(data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks);
- break;
-// hipSetDeviceFlags[('unsigned int', 'flags')]
- case HIP_API_ID_hipSetDeviceFlags:
- break;
-// hipHccModuleLaunchKernel[('hipFunction_t', 'f'), ('unsigned int', 'globalWorkSizeX'), ('unsigned int', 'globalWorkSizeY'), ('unsigned int', 'globalWorkSizeZ'), ('unsigned int', 'blockDimX'), ('unsigned int', 'blockDimY'), ('unsigned int', 'blockDimZ'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'hStream'), ('void**', 'kernelParams'), ('void**', 'extra'), ('hipEvent_t', 'startEvent'), ('hipEvent_t', 'stopEvent')]
- case HIP_API_ID_hipHccModuleLaunchKernel:
- if (data->args.hipHccModuleLaunchKernel.kernelParams) data->args.hipHccModuleLaunchKernel.kernelParams__val = *(data->args.hipHccModuleLaunchKernel.kernelParams);
- if (data->args.hipHccModuleLaunchKernel.extra) data->args.hipHccModuleLaunchKernel.extra__val = *(data->args.hipHccModuleLaunchKernel.extra);
- break;
-// hipFree[('void*', 'ptr')]
- case HIP_API_ID_hipFree:
- break;
-// hipOccupancyMaxPotentialBlockSize[('int*', 'gridSize'), ('int*', 'blockSize'), ('const void*', 'f'), ('size_t', 'dynSharedMemPerBlk'), ('int', 'blockSizeLimit')]
- case HIP_API_ID_hipOccupancyMaxPotentialBlockSize:
- if (data->args.hipOccupancyMaxPotentialBlockSize.gridSize) data->args.hipOccupancyMaxPotentialBlockSize.gridSize__val = *(data->args.hipOccupancyMaxPotentialBlockSize.gridSize);
- if (data->args.hipOccupancyMaxPotentialBlockSize.blockSize) data->args.hipOccupancyMaxPotentialBlockSize.blockSize__val = *(data->args.hipOccupancyMaxPotentialBlockSize.blockSize);
- break;
-// hipDeviceGetAttribute[('int*', 'pi'), ('hipDeviceAttribute_t', 'attr'), ('int', 'deviceId')]
- case HIP_API_ID_hipDeviceGetAttribute:
- if (data->args.hipDeviceGetAttribute.pi) data->args.hipDeviceGetAttribute.pi__val = *(data->args.hipDeviceGetAttribute.pi);
- break;
-// hipDeviceComputeCapability[('int*', 'major'), ('int*', 'minor'), ('hipDevice_t', 'device')]
- case HIP_API_ID_hipDeviceComputeCapability:
- if (data->args.hipDeviceComputeCapability.major) data->args.hipDeviceComputeCapability.major__val = *(data->args.hipDeviceComputeCapability.major);
- if (data->args.hipDeviceComputeCapability.minor) data->args.hipDeviceComputeCapability.minor__val = *(data->args.hipDeviceComputeCapability.minor);
- break;
-// hipCtxDisablePeerAccess[('hipCtx_t', 'peerCtx')]
- case HIP_API_ID_hipCtxDisablePeerAccess:
- break;
-// hipMallocManaged[('void**', 'dev_ptr'), ('size_t', 'size'), ('unsigned int', 'flags')]
- case HIP_API_ID_hipMallocManaged:
- if (data->args.hipMallocManaged.dev_ptr) data->args.hipMallocManaged.dev_ptr__val = *(data->args.hipMallocManaged.dev_ptr);
- break;
-// hipDeviceGetByPCIBusId[('int*', 'device'), ('const char*', 'pciBusId')]
- case HIP_API_ID_hipDeviceGetByPCIBusId:
- if (data->args.hipDeviceGetByPCIBusId.device) data->args.hipDeviceGetByPCIBusId.device__val = *(data->args.hipDeviceGetByPCIBusId.device);
- if (data->args.hipDeviceGetByPCIBusId.pciBusId) data->args.hipDeviceGetByPCIBusId.pciBusId__val = *(data->args.hipDeviceGetByPCIBusId.pciBusId);
- break;
-// hipIpcGetMemHandle[('hipIpcMemHandle_t*', 'handle'), ('void*', 'devPtr')]
- case HIP_API_ID_hipIpcGetMemHandle:
- if (data->args.hipIpcGetMemHandle.handle) data->args.hipIpcGetMemHandle.handle__val = *(data->args.hipIpcGetMemHandle.handle);
- break;
-// hipMemcpyHtoDAsync[('hipDeviceptr_t', 'dst'), ('void*', 'src'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')]
- case HIP_API_ID_hipMemcpyHtoDAsync:
- break;
-// hipCtxGetDevice[('hipDevice_t*', 'device')]
- case HIP_API_ID_hipCtxGetDevice:
- if (data->args.hipCtxGetDevice.device) data->args.hipCtxGetDevice.device__val = *(data->args.hipCtxGetDevice.device);
- break;
-// hipMemcpyDtoD[('hipDeviceptr_t', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes')]
- case HIP_API_ID_hipMemcpyDtoD:
- break;
-// hipModuleLoadData[('hipModule_t*', 'module'), ('const void*', 'image')]
- case HIP_API_ID_hipModuleLoadData:
- if (data->args.hipModuleLoadData.module) data->args.hipModuleLoadData.module__val = *(data->args.hipModuleLoadData.module);
- break;
-// hipDevicePrimaryCtxRelease[('hipDevice_t', 'dev')]
- case HIP_API_ID_hipDevicePrimaryCtxRelease:
- break;
-// hipOccupancyMaxActiveBlocksPerMultiprocessor[('int*', 'numBlocks'), ('const void*', 'f'), ('int', 'blockSize'), ('size_t', 'dynamicSMemSize')]
- case HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessor:
- if (data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks) data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks__val = *(data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks);
- break;
-// hipCtxSetCurrent[('hipCtx_t', 'ctx')]
- case HIP_API_ID_hipCtxSetCurrent:
- break;
-// hipGetErrorString[]
- case HIP_API_ID_hipGetErrorString:
- break;
-// hipStreamCreate[('hipStream_t*', 'stream')]
- case HIP_API_ID_hipStreamCreate:
- if (data->args.hipStreamCreate.stream) data->args.hipStreamCreate.stream__val = *(data->args.hipStreamCreate.stream);
- break;
-// hipDevicePrimaryCtxRetain[('hipCtx_t*', 'pctx'), ('hipDevice_t', 'dev')]
- case HIP_API_ID_hipDevicePrimaryCtxRetain:
- if (data->args.hipDevicePrimaryCtxRetain.pctx) data->args.hipDevicePrimaryCtxRetain.pctx__val = *(data->args.hipDevicePrimaryCtxRetain.pctx);
- break;
-// hipDeviceGet[('hipDevice_t*', 'device'), ('int', 'ordinal')]
- case HIP_API_ID_hipDeviceGet:
- if (data->args.hipDeviceGet.device) data->args.hipDeviceGet.device__val = *(data->args.hipDeviceGet.device);
- break;
-// hipStreamCreateWithFlags[('hipStream_t*', 'stream'), ('unsigned int', 'flags')]
- case HIP_API_ID_hipStreamCreateWithFlags:
- if (data->args.hipStreamCreateWithFlags.stream) data->args.hipStreamCreateWithFlags.stream__val = *(data->args.hipStreamCreateWithFlags.stream);
- break;
-// hipMemcpyFromArray[('void*', 'dst'), ('hipArray_const_t', 'srcArray'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')]
- case HIP_API_ID_hipMemcpyFromArray:
- break;
-// hipMemcpy2DAsync[('void*', 'dst'), ('size_t', 'dpitch'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')]
- case HIP_API_ID_hipMemcpy2DAsync:
- break;
-// hipFuncGetAttributes[('hipFuncAttributes*', 'attr'), ('const void*', 'func')]
- case HIP_API_ID_hipFuncGetAttributes:
- if (data->args.hipFuncGetAttributes.attr) data->args.hipFuncGetAttributes.attr__val = *(data->args.hipFuncGetAttributes.attr);
- break;
-// hipGetSymbolSize[('size_t*', 'size'), ('const void*', 'symbol')]
- case HIP_API_ID_hipGetSymbolSize:
- if (data->args.hipGetSymbolSize.size) data->args.hipGetSymbolSize.size__val = *(data->args.hipGetSymbolSize.size);
- break;
-// hipHostFree[('void*', 'ptr')]
- case HIP_API_ID_hipHostFree:
- break;
-// hipEventCreateWithFlags[('hipEvent_t*', 'event'), ('unsigned int', 'flags')]
- case HIP_API_ID_hipEventCreateWithFlags:
- if (data->args.hipEventCreateWithFlags.event) data->args.hipEventCreateWithFlags.event__val = *(data->args.hipEventCreateWithFlags.event);
- break;
-// hipStreamQuery[('hipStream_t', 'stream')]
- case HIP_API_ID_hipStreamQuery:
- break;
-// hipMemcpy3D[('const hipMemcpy3DParms*', 'p')]
- case HIP_API_ID_hipMemcpy3D:
- if (data->args.hipMemcpy3D.p) data->args.hipMemcpy3D.p__val = *(data->args.hipMemcpy3D.p);
- break;
-// hipMemcpyToSymbol[('const void*', 'symbol'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')]
- case HIP_API_ID_hipMemcpyToSymbol:
- break;
-// hipMemcpy[('void*', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipMemcpyKind', 'kind')]
- case HIP_API_ID_hipMemcpy:
- break;
-// hipPeekAtLastError[]
- case HIP_API_ID_hipPeekAtLastError:
- break;
-// hipExtLaunchMultiKernelMultiDevice[('hipLaunchParams*', 'launchParamsList'), ('int', 'numDevices'), ('unsigned int', 'flags')]
- case HIP_API_ID_hipExtLaunchMultiKernelMultiDevice:
- if (data->args.hipExtLaunchMultiKernelMultiDevice.launchParamsList) data->args.hipExtLaunchMultiKernelMultiDevice.launchParamsList__val = *(data->args.hipExtLaunchMultiKernelMultiDevice.launchParamsList);
- break;
-// hipHostAlloc[('void**', 'ptr'), ('size_t', 'size'), ('unsigned int', 'flags')]
- case HIP_API_ID_hipHostAlloc:
- if (data->args.hipHostAlloc.ptr) data->args.hipHostAlloc.ptr__val = *(data->args.hipHostAlloc.ptr);
- break;
-// hipStreamAddCallback[('hipStream_t', 'stream'), ('hipStreamCallback_t', 'callback'), ('void*', 'userData'), ('unsigned int', 'flags')]
- case HIP_API_ID_hipStreamAddCallback:
- break;
-// hipMemcpyToArray[('hipArray*', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')]
- case HIP_API_ID_hipMemcpyToArray:
- if (data->args.hipMemcpyToArray.dst) data->args.hipMemcpyToArray.dst__val = *(data->args.hipMemcpyToArray.dst);
- break;
-// hipMemsetD32[('hipDeviceptr_t', 'dest'), ('int', 'value'), ('size_t', 'count')]
- case HIP_API_ID_hipMemsetD32:
- break;
-// hipExtModuleLaunchKernel[('hipFunction_t', 'f'), ('unsigned int', 'globalWorkSizeX'), ('unsigned int', 'globalWorkSizeY'), ('unsigned int', 'globalWorkSizeZ'), ('unsigned int', 'localWorkSizeX'), ('unsigned int', 'localWorkSizeY'), ('unsigned int', 'localWorkSizeZ'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'hStream'), ('void**', 'kernelParams'), ('void**', 'extra'), ('hipEvent_t', 'startEvent'), ('hipEvent_t', 'stopEvent'), ('unsigned int', 'flags')]
- case HIP_API_ID_hipExtModuleLaunchKernel:
- if (data->args.hipExtModuleLaunchKernel.kernelParams) data->args.hipExtModuleLaunchKernel.kernelParams__val = *(data->args.hipExtModuleLaunchKernel.kernelParams);
- if (data->args.hipExtModuleLaunchKernel.extra) data->args.hipExtModuleLaunchKernel.extra__val = *(data->args.hipExtModuleLaunchKernel.extra);
- break;
-// hipDeviceSynchronize[]
- case HIP_API_ID_hipDeviceSynchronize:
- break;
-// hipDeviceGetCacheConfig[('hipFuncCache_t*', 'cacheConfig')]
- case HIP_API_ID_hipDeviceGetCacheConfig:
- if (data->args.hipDeviceGetCacheConfig.cacheConfig) data->args.hipDeviceGetCacheConfig.cacheConfig__val = *(data->args.hipDeviceGetCacheConfig.cacheConfig);
- break;
-// hipMalloc3D[('hipPitchedPtr*', 'pitchedDevPtr'), ('hipExtent', 'extent')]
- case HIP_API_ID_hipMalloc3D:
- if (data->args.hipMalloc3D.pitchedDevPtr) data->args.hipMalloc3D.pitchedDevPtr__val = *(data->args.hipMalloc3D.pitchedDevPtr);
- break;
-// hipPointerGetAttributes[('hipPointerAttribute_t*', 'attributes'), ('const void*', 'ptr')]
- case HIP_API_ID_hipPointerGetAttributes:
- if (data->args.hipPointerGetAttributes.attributes) data->args.hipPointerGetAttributes.attributes__val = *(data->args.hipPointerGetAttributes.attributes);
- break;
-// hipMemsetAsync[('void*', 'dst'), ('int', 'value'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')]
- case HIP_API_ID_hipMemsetAsync:
- break;
-// hipDeviceGetName[('char*', 'name'), ('int', 'len'), ('hipDevice_t', 'device')]
- case HIP_API_ID_hipDeviceGetName:
- data->args.hipDeviceGetName.name = (data->args.hipDeviceGetName.name) ? strdup(data->args.hipDeviceGetName.name) : NULL;
- break;
-// hipModuleOccupancyMaxPotentialBlockSizeWithFlags[('int*', 'gridSize'), ('int*', 'blockSize'), ('hipFunction_t', 'f'), ('size_t', 'dynSharedMemPerBlk'), ('int', 'blockSizeLimit'), ('unsigned int', 'flags')]
- case HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSizeWithFlags:
- if (data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.gridSize) data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.gridSize__val = *(data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.gridSize);
- if (data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSize) data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSize__val = *(data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSize);
- break;
-// hipCtxPushCurrent[('hipCtx_t', 'ctx')]
- case HIP_API_ID_hipCtxPushCurrent:
- break;
-// hipMemcpyPeer[('void*', 'dst'), ('int', 'dstDeviceId'), ('const void*', 'src'), ('int', 'srcDeviceId'), ('size_t', 'sizeBytes')]
- case HIP_API_ID_hipMemcpyPeer:
- break;
-// hipEventSynchronize[('hipEvent_t', 'event')]
- case HIP_API_ID_hipEventSynchronize:
- break;
-// hipMemcpyDtoDAsync[('hipDeviceptr_t', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')]
- case HIP_API_ID_hipMemcpyDtoDAsync:
- break;
-// hipProfilerStart[]
- case HIP_API_ID_hipProfilerStart:
- break;
-// hipExtMallocWithFlags[('void**', 'ptr'), ('size_t', 'sizeBytes'), ('unsigned int', 'flags')]
- case HIP_API_ID_hipExtMallocWithFlags:
- if (data->args.hipExtMallocWithFlags.ptr) data->args.hipExtMallocWithFlags.ptr__val = *(data->args.hipExtMallocWithFlags.ptr);
- break;
-// hipCtxEnablePeerAccess[('hipCtx_t', 'peerCtx'), ('unsigned int', 'flags')]
- case HIP_API_ID_hipCtxEnablePeerAccess:
- break;
-// hipMemAllocHost[('void**', 'ptr'), ('size_t', 'size')]
- case HIP_API_ID_hipMemAllocHost:
- if (data->args.hipMemAllocHost.ptr) data->args.hipMemAllocHost.ptr__val = *(data->args.hipMemAllocHost.ptr);
- break;
-// hipMemcpyDtoHAsync[('void*', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')]
- case HIP_API_ID_hipMemcpyDtoHAsync:
- break;
-// hipModuleLaunchKernel[('hipFunction_t', 'f'), ('unsigned int', 'gridDimX'), ('unsigned int', 'gridDimY'), ('unsigned int', 'gridDimZ'), ('unsigned int', 'blockDimX'), ('unsigned int', 'blockDimY'), ('unsigned int', 'blockDimZ'), ('unsigned int', 'sharedMemBytes'), ('hipStream_t', 'stream'), ('void**', 'kernelParams'), ('void**', 'extra')]
- case HIP_API_ID_hipModuleLaunchKernel:
- if (data->args.hipModuleLaunchKernel.kernelParams) data->args.hipModuleLaunchKernel.kernelParams__val = *(data->args.hipModuleLaunchKernel.kernelParams);
- if (data->args.hipModuleLaunchKernel.extra) data->args.hipModuleLaunchKernel.extra__val = *(data->args.hipModuleLaunchKernel.extra);
- break;
-// hipMemAllocPitch[('hipDeviceptr_t*', 'dptr'), ('size_t*', 'pitch'), ('size_t', 'widthInBytes'), ('size_t', 'height'), ('unsigned int', 'elementSizeBytes')]
- case HIP_API_ID_hipMemAllocPitch:
- if (data->args.hipMemAllocPitch.dptr) data->args.hipMemAllocPitch.dptr__val = *(data->args.hipMemAllocPitch.dptr);
- if (data->args.hipMemAllocPitch.pitch) data->args.hipMemAllocPitch.pitch__val = *(data->args.hipMemAllocPitch.pitch);
- break;
-// hipExtLaunchKernel[('const void*', 'function_address'), ('dim3', 'numBlocks'), ('dim3', 'dimBlocks'), ('void**', 'args'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'stream'), ('hipEvent_t', 'startEvent'), ('hipEvent_t', 'stopEvent'), ('int', 'flags')]
- case HIP_API_ID_hipExtLaunchKernel:
- if (data->args.hipExtLaunchKernel.args) data->args.hipExtLaunchKernel.args__val = *(data->args.hipExtLaunchKernel.args);
- break;
-// hipMemcpy2DFromArrayAsync[('void*', 'dst'), ('size_t', 'dpitch'), ('hipArray_const_t', 'src'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')]
- case HIP_API_ID_hipMemcpy2DFromArrayAsync:
- break;
-// hipDeviceGetLimit[('size_t*', 'pValue'), ('hipLimit_t', 'limit')]
- case HIP_API_ID_hipDeviceGetLimit:
- if (data->args.hipDeviceGetLimit.pValue) data->args.hipDeviceGetLimit.pValue__val = *(data->args.hipDeviceGetLimit.pValue);
- break;
-// hipModuleLoadDataEx[('hipModule_t*', 'module'), ('const void*', 'image'), ('unsigned int', 'numOptions'), ('hipJitOption*', 'options'), ('void**', 'optionsValues')]
- case HIP_API_ID_hipModuleLoadDataEx:
- if (data->args.hipModuleLoadDataEx.module) data->args.hipModuleLoadDataEx.module__val = *(data->args.hipModuleLoadDataEx.module);
- if (data->args.hipModuleLoadDataEx.options) data->args.hipModuleLoadDataEx.options__val = *(data->args.hipModuleLoadDataEx.options);
- if (data->args.hipModuleLoadDataEx.optionsValues) data->args.hipModuleLoadDataEx.optionsValues__val = *(data->args.hipModuleLoadDataEx.optionsValues);
- break;
-// hipRuntimeGetVersion[('int*', 'runtimeVersion')]
- case HIP_API_ID_hipRuntimeGetVersion:
- if (data->args.hipRuntimeGetVersion.runtimeVersion) data->args.hipRuntimeGetVersion.runtimeVersion__val = *(data->args.hipRuntimeGetVersion.runtimeVersion);
- break;
-// hipMemRangeGetAttribute[('void*', 'data'), ('size_t', 'data_size'), ('hipMemRangeAttribute', 'attribute'), ('const void*', 'dev_ptr'), ('size_t', 'count')]
- case HIP_API_ID_hipMemRangeGetAttribute:
- break;
-// hipDeviceGetP2PAttribute[('int*', 'value'), ('hipDeviceP2PAttr', 'attr'), ('int', 'srcDevice'), ('int', 'dstDevice')]
- case HIP_API_ID_hipDeviceGetP2PAttribute:
- if (data->args.hipDeviceGetP2PAttribute.value) data->args.hipDeviceGetP2PAttribute.value__val = *(data->args.hipDeviceGetP2PAttribute.value);
- break;
-// hipMemcpyPeerAsync[('void*', 'dst'), ('int', 'dstDeviceId'), ('const void*', 'src'), ('int', 'srcDevice'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')]
- case HIP_API_ID_hipMemcpyPeerAsync:
- break;
-// hipGetDeviceProperties[('hipDeviceProp_t*', 'props'), ('hipDevice_t', 'device')]
- case HIP_API_ID_hipGetDeviceProperties:
- if (data->args.hipGetDeviceProperties.props) data->args.hipGetDeviceProperties.props__val = *(data->args.hipGetDeviceProperties.props);
- break;
-// hipMemcpyDtoH[('void*', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes')]
- case HIP_API_ID_hipMemcpyDtoH:
- break;
-// hipMemcpyWithStream[('void*', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')]
- case HIP_API_ID_hipMemcpyWithStream:
- break;
-// hipDeviceTotalMem[('size_t*', 'bytes'), ('hipDevice_t', 'device')]
- case HIP_API_ID_hipDeviceTotalMem:
- if (data->args.hipDeviceTotalMem.bytes) data->args.hipDeviceTotalMem.bytes__val = *(data->args.hipDeviceTotalMem.bytes);
- break;
-// hipHostGetDevicePointer[('void**', 'devPtr'), ('void*', 'hstPtr'), ('unsigned int', 'flags')]
- case HIP_API_ID_hipHostGetDevicePointer:
- if (data->args.hipHostGetDevicePointer.devPtr) data->args.hipHostGetDevicePointer.devPtr__val = *(data->args.hipHostGetDevicePointer.devPtr);
- break;
-// hipMemRangeGetAttributes[('void**', 'data'), ('size_t*', 'data_sizes'), ('hipMemRangeAttribute*', 'attributes'), ('size_t', 'num_attributes'), ('const void*', 'dev_ptr'), ('size_t', 'count')]
- case HIP_API_ID_hipMemRangeGetAttributes:
- if (data->args.hipMemRangeGetAttributes.data) data->args.hipMemRangeGetAttributes.data__val = *(data->args.hipMemRangeGetAttributes.data);
- if (data->args.hipMemRangeGetAttributes.data_sizes) data->args.hipMemRangeGetAttributes.data_sizes__val = *(data->args.hipMemRangeGetAttributes.data_sizes);
- if (data->args.hipMemRangeGetAttributes.attributes) data->args.hipMemRangeGetAttributes.attributes__val = *(data->args.hipMemRangeGetAttributes.attributes);
- break;
-// hipMemcpyParam2D[('const hip_Memcpy2D*', 'pCopy')]
- case HIP_API_ID_hipMemcpyParam2D:
- if (data->args.hipMemcpyParam2D.pCopy) data->args.hipMemcpyParam2D.pCopy__val = *(data->args.hipMemcpyParam2D.pCopy);
- break;
-// hipDevicePrimaryCtxReset[('hipDevice_t', 'dev')]
- case HIP_API_ID_hipDevicePrimaryCtxReset:
- break;
-// hipGetMipmappedArrayLevel[('hipArray_t*', 'levelArray'), ('hipMipmappedArray_const_t', 'mipmappedArray'), ('unsigned int', 'level')]
- case HIP_API_ID_hipGetMipmappedArrayLevel:
- if (data->args.hipGetMipmappedArrayLevel.levelArray) data->args.hipGetMipmappedArrayLevel.levelArray__val = *(data->args.hipGetMipmappedArrayLevel.levelArray);
- break;
-// hipMemsetD32Async[('hipDeviceptr_t', 'dst'), ('int', 'value'), ('size_t', 'count'), ('hipStream_t', 'stream')]
- case HIP_API_ID_hipMemsetD32Async:
- break;
-// hipGetDevice[('int*', 'deviceId')]
- case HIP_API_ID_hipGetDevice:
- if (data->args.hipGetDevice.deviceId) data->args.hipGetDevice.deviceId__val = *(data->args.hipGetDevice.deviceId);
- break;
-// hipGetDeviceCount[('int*', 'count')]
- case HIP_API_ID_hipGetDeviceCount:
- if (data->args.hipGetDeviceCount.count) data->args.hipGetDeviceCount.count__val = *(data->args.hipGetDeviceCount.count);
- break;
-// hipIpcOpenEventHandle[('hipEvent_t*', 'event'), ('hipIpcEventHandle_t', 'handle')]
- case HIP_API_ID_hipIpcOpenEventHandle:
- if (data->args.hipIpcOpenEventHandle.event) data->args.hipIpcOpenEventHandle.event__val = *(data->args.hipIpcOpenEventHandle.event);
- break;
- default: break;
- };
-}
-
-#include <sstream>
-#include <string>
-// HIP API string method, method name and parameters
-static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* data) {
- std::ostringstream oss;
- switch (id) {
- case HIP_API_ID_hipDrvMemcpy3DAsync:
- oss << "hipDrvMemcpy3DAsync(";
- if (data->args.hipDrvMemcpy3DAsync.pCopy == NULL) oss << "pCopy=NULL";
- else oss << "pCopy=" << data->args.hipDrvMemcpy3DAsync.pCopy__val;
- oss << ", stream=" << data->args.hipDrvMemcpy3DAsync.stream;
- oss << ")";
- break;
- case HIP_API_ID_hipDeviceEnablePeerAccess:
- oss << "hipDeviceEnablePeerAccess(";
- oss << "peerDeviceId=" << data->args.hipDeviceEnablePeerAccess.peerDeviceId;
- oss << ", flags=" << data->args.hipDeviceEnablePeerAccess.flags;
- oss << ")";
- break;
- case HIP_API_ID_hipFuncSetSharedMemConfig:
- oss << "hipFuncSetSharedMemConfig(";
- oss << "func=" << data->args.hipFuncSetSharedMemConfig.func;
- oss << ", config=" << data->args.hipFuncSetSharedMemConfig.config;
- oss << ")";
- break;
- case HIP_API_ID_hipMemcpyToSymbolAsync:
- oss << "hipMemcpyToSymbolAsync(";
- oss << "symbol=" << data->args.hipMemcpyToSymbolAsync.symbol;
- oss << ", src=" << data->args.hipMemcpyToSymbolAsync.src;
- oss << ", sizeBytes=" << data->args.hipMemcpyToSymbolAsync.sizeBytes;
- oss << ", offset=" << data->args.hipMemcpyToSymbolAsync.offset;
- oss << ", kind=" << data->args.hipMemcpyToSymbolAsync.kind;
- oss << ", stream=" << data->args.hipMemcpyToSymbolAsync.stream;
- oss << ")";
- break;
- case HIP_API_ID_hipMallocPitch:
- oss << "hipMallocPitch(";
- if (data->args.hipMallocPitch.ptr == NULL) oss << "ptr=NULL";
- else oss << "ptr=" << data->args.hipMallocPitch.ptr__val;
- if (data->args.hipMallocPitch.pitch == NULL) oss << ", pitch=NULL";
- else oss << ", pitch=" << data->args.hipMallocPitch.pitch__val;
- oss << ", width=" << data->args.hipMallocPitch.width;
- oss << ", height=" << data->args.hipMallocPitch.height;
- oss << ")";
- break;
- case HIP_API_ID_hipMalloc:
- oss << "hipMalloc(";
- if (data->args.hipMalloc.ptr == NULL) oss << "ptr=NULL";
- else oss << "ptr=" << data->args.hipMalloc.ptr__val;
- oss << ", size=" << data->args.hipMalloc.size;
- oss << ")";
- break;
- case HIP_API_ID_hipMemsetD16:
- oss << "hipMemsetD16(";
- oss << "dest=" << data->args.hipMemsetD16.dest;
- oss << ", value=" << data->args.hipMemsetD16.value;
- oss << ", count=" << data->args.hipMemsetD16.count;
- oss << ")";
- break;
- case HIP_API_ID_hipExtStreamGetCUMask:
- oss << "hipExtStreamGetCUMask(";
- oss << "stream=" << data->args.hipExtStreamGetCUMask.stream;
- oss << ", cuMaskSize=" << data->args.hipExtStreamGetCUMask.cuMaskSize;
- if (data->args.hipExtStreamGetCUMask.cuMask == NULL) oss << ", cuMask=NULL";
- else oss << ", cuMask=" << data->args.hipExtStreamGetCUMask.cuMask__val;
- oss << ")";
- break;
- case HIP_API_ID_hipEventRecord:
- oss << "hipEventRecord(";
- oss << "event=" << data->args.hipEventRecord.event;
- oss << ", stream=" << data->args.hipEventRecord.stream;
- oss << ")";
- break;
- case HIP_API_ID_hipCtxSynchronize:
- oss << "hipCtxSynchronize(";
- oss << ")";
- break;
- case HIP_API_ID_hipSetDevice:
- oss << "hipSetDevice(";
- oss << "deviceId=" << data->args.hipSetDevice.deviceId;
- oss << ")";
- break;
- case HIP_API_ID_hipCtxGetApiVersion:
- oss << "hipCtxGetApiVersion(";
- oss << "ctx=" << data->args.hipCtxGetApiVersion.ctx;
- if (data->args.hipCtxGetApiVersion.apiVersion == NULL) oss << ", apiVersion=NULL";
- else oss << ", apiVersion=" << data->args.hipCtxGetApiVersion.apiVersion__val;
- oss << ")";
- break;
- case HIP_API_ID_hipMemcpyFromSymbolAsync:
- oss << "hipMemcpyFromSymbolAsync(";
- oss << "dst=" << data->args.hipMemcpyFromSymbolAsync.dst;
- oss << ", symbol=" << data->args.hipMemcpyFromSymbolAsync.symbol;
- oss << ", sizeBytes=" << data->args.hipMemcpyFromSymbolAsync.sizeBytes;
- oss << ", offset=" << data->args.hipMemcpyFromSymbolAsync.offset;
- oss << ", kind=" << data->args.hipMemcpyFromSymbolAsync.kind;
- oss << ", stream=" << data->args.hipMemcpyFromSymbolAsync.stream;
- oss << ")";
- break;
- case HIP_API_ID_hipExtGetLinkTypeAndHopCount:
- oss << "hipExtGetLinkTypeAndHopCount(";
- oss << "device1=" << data->args.hipExtGetLinkTypeAndHopCount.device1;
- oss << ", device2=" << data->args.hipExtGetLinkTypeAndHopCount.device2;
- if (data->args.hipExtGetLinkTypeAndHopCount.linktype == NULL) oss << ", linktype=NULL";
- else oss << ", linktype=" << data->args.hipExtGetLinkTypeAndHopCount.linktype__val;
- if (data->args.hipExtGetLinkTypeAndHopCount.hopcount == NULL) oss << ", hopcount=NULL";
- else oss << ", hopcount=" << data->args.hipExtGetLinkTypeAndHopCount.hopcount__val;
- oss << ")";
- break;
- case HIP_API_ID___hipPopCallConfiguration:
- oss << "__hipPopCallConfiguration(";
- if (data->args.__hipPopCallConfiguration.gridDim == NULL) oss << "gridDim=NULL";
- else oss << "gridDim=" << data->args.__hipPopCallConfiguration.gridDim__val;
- if (data->args.__hipPopCallConfiguration.blockDim == NULL) oss << ", blockDim=NULL";
- else oss << ", blockDim=" << data->args.__hipPopCallConfiguration.blockDim__val;
- if (data->args.__hipPopCallConfiguration.sharedMem == NULL) oss << ", sharedMem=NULL";
- else oss << ", sharedMem=" << data->args.__hipPopCallConfiguration.sharedMem__val;
- if (data->args.__hipPopCallConfiguration.stream == NULL) oss << ", stream=NULL";
- else oss << ", stream=" << data->args.__hipPopCallConfiguration.stream__val;
- oss << ")";
- break;
- case HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor:
- oss << "hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(";
- if (data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks == NULL) oss << "numBlocks=NULL";
- else oss << "numBlocks=" << data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks__val;
- oss << ", f=" << data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.f;
- oss << ", blockSize=" << data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.blockSize;
- oss << ", dynSharedMemPerBlk=" << data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.dynSharedMemPerBlk;
- oss << ")";
- break;
- case HIP_API_ID_hipMemset3D:
- oss << "hipMemset3D(";
- oss << "pitchedDevPtr=" << data->args.hipMemset3D.pitchedDevPtr;
- oss << ", value=" << data->args.hipMemset3D.value;
- oss << ", extent=" << data->args.hipMemset3D.extent;
- oss << ")";
- break;
- case HIP_API_ID_hipStreamCreateWithPriority:
- oss << "hipStreamCreateWithPriority(";
- if (data->args.hipStreamCreateWithPriority.stream == NULL) oss << "stream=NULL";
- else oss << "stream=" << data->args.hipStreamCreateWithPriority.stream__val;
- oss << ", flags=" << data->args.hipStreamCreateWithPriority.flags;
- oss << ", priority=" << data->args.hipStreamCreateWithPriority.priority;
- oss << ")";
- break;
- case HIP_API_ID_hipMemcpy2DToArray:
- oss << "hipMemcpy2DToArray(";
- if (data->args.hipMemcpy2DToArray.dst == NULL) oss << "dst=NULL";
- else oss << "dst=" << data->args.hipMemcpy2DToArray.dst__val;
- oss << ", wOffset=" << data->args.hipMemcpy2DToArray.wOffset;
- oss << ", hOffset=" << data->args.hipMemcpy2DToArray.hOffset;
- oss << ", src=" << data->args.hipMemcpy2DToArray.src;
- oss << ", spitch=" << data->args.hipMemcpy2DToArray.spitch;
- oss << ", width=" << data->args.hipMemcpy2DToArray.width;
- oss << ", height=" << data->args.hipMemcpy2DToArray.height;
- oss << ", kind=" << data->args.hipMemcpy2DToArray.kind;
- oss << ")";
- break;
- case HIP_API_ID_hipMemsetD8Async:
- oss << "hipMemsetD8Async(";
- oss << "dest=" << data->args.hipMemsetD8Async.dest;
- oss << ", value=" << data->args.hipMemsetD8Async.value;
- oss << ", count=" << data->args.hipMemsetD8Async.count;
- oss << ", stream=" << data->args.hipMemsetD8Async.stream;
- oss << ")";
- break;
- case HIP_API_ID_hipCtxGetCacheConfig:
- oss << "hipCtxGetCacheConfig(";
- if (data->args.hipCtxGetCacheConfig.cacheConfig == NULL) oss << "cacheConfig=NULL";
- else oss << "cacheConfig=" << data->args.hipCtxGetCacheConfig.cacheConfig__val;
- oss << ")";
- break;
- case HIP_API_ID_hipModuleGetFunction:
- oss << "hipModuleGetFunction(";
- if (data->args.hipModuleGetFunction.function == NULL) oss << "function=NULL";
- else oss << "function=" << data->args.hipModuleGetFunction.function__val;
- oss << ", module=" << data->args.hipModuleGetFunction.module;
- if (data->args.hipModuleGetFunction.kname == NULL) oss << ", kname=NULL";
- else oss << ", kname=" << data->args.hipModuleGetFunction.kname__val;
- oss << ")";
- break;
- case HIP_API_ID_hipStreamWaitEvent:
- oss << "hipStreamWaitEvent(";
- oss << "stream=" << data->args.hipStreamWaitEvent.stream;
- oss << ", event=" << data->args.hipStreamWaitEvent.event;
- oss << ", flags=" << data->args.hipStreamWaitEvent.flags;
- oss << ")";
- break;
- case HIP_API_ID_hipDeviceGetStreamPriorityRange:
- oss << "hipDeviceGetStreamPriorityRange(";
- if (data->args.hipDeviceGetStreamPriorityRange.leastPriority == NULL) oss << "leastPriority=NULL";
- else oss << "leastPriority=" << data->args.hipDeviceGetStreamPriorityRange.leastPriority__val;
- if (data->args.hipDeviceGetStreamPriorityRange.greatestPriority == NULL) oss << ", greatestPriority=NULL";
- else oss << ", greatestPriority=" << data->args.hipDeviceGetStreamPriorityRange.greatestPriority__val;
- oss << ")";
- break;
- case HIP_API_ID_hipModuleLoad:
- oss << "hipModuleLoad(";
- if (data->args.hipModuleLoad.module == NULL) oss << "module=NULL";
- else oss << "module=" << data->args.hipModuleLoad.module__val;
- if (data->args.hipModuleLoad.fname == NULL) oss << ", fname=NULL";
- else oss << ", fname=" << data->args.hipModuleLoad.fname__val;
- oss << ")";
- break;
- case HIP_API_ID_hipDevicePrimaryCtxSetFlags:
- oss << "hipDevicePrimaryCtxSetFlags(";
- oss << "dev=" << data->args.hipDevicePrimaryCtxSetFlags.dev;
- oss << ", flags=" << data->args.hipDevicePrimaryCtxSetFlags.flags;
- oss << ")";
- break;
- case HIP_API_ID_hipLaunchCooperativeKernel:
- oss << "hipLaunchCooperativeKernel(";
- oss << "f=" << data->args.hipLaunchCooperativeKernel.f;
- oss << ", gridDim=" << data->args.hipLaunchCooperativeKernel.gridDim;
- oss << ", blockDimX=" << data->args.hipLaunchCooperativeKernel.blockDimX;
- if (data->args.hipLaunchCooperativeKernel.kernelParams == NULL) oss << ", kernelParams=NULL";
- else oss << ", kernelParams=" << data->args.hipLaunchCooperativeKernel.kernelParams__val;
- oss << ", sharedMemBytes=" << data->args.hipLaunchCooperativeKernel.sharedMemBytes;
- oss << ", stream=" << data->args.hipLaunchCooperativeKernel.stream;
- oss << ")";
- break;
- case HIP_API_ID_hipLaunchCooperativeKernelMultiDevice:
- oss << "hipLaunchCooperativeKernelMultiDevice(";
- if (data->args.hipLaunchCooperativeKernelMultiDevice.launchParamsList == NULL) oss << "launchParamsList=NULL";
- else oss << "launchParamsList=" << data->args.hipLaunchCooperativeKernelMultiDevice.launchParamsList__val;
- oss << ", numDevices=" << data->args.hipLaunchCooperativeKernelMultiDevice.numDevices;
- oss << ", flags=" << data->args.hipLaunchCooperativeKernelMultiDevice.flags;
- oss << ")";
- break;
- case HIP_API_ID_hipMemcpyAsync:
- oss << "hipMemcpyAsync(";
- oss << "dst=" << data->args.hipMemcpyAsync.dst;
- oss << ", src=" << data->args.hipMemcpyAsync.src;
- oss << ", sizeBytes=" << data->args.hipMemcpyAsync.sizeBytes;
- oss << ", kind=" << data->args.hipMemcpyAsync.kind;
- oss << ", stream=" << data->args.hipMemcpyAsync.stream;
- oss << ")";
- break;
- case HIP_API_ID_hipMalloc3DArray:
- oss << "hipMalloc3DArray(";
- if (data->args.hipMalloc3DArray.array == NULL) oss << "array=NULL";
- else oss << "array=" << data->args.hipMalloc3DArray.array__val;
- if (data->args.hipMalloc3DArray.desc == NULL) oss << ", desc=NULL";
- else oss << ", desc=" << data->args.hipMalloc3DArray.desc__val;
- oss << ", extent=" << data->args.hipMalloc3DArray.extent;
- oss << ", flags=" << data->args.hipMalloc3DArray.flags;
- oss << ")";
- break;
- case HIP_API_ID_hipMallocHost:
- oss << "hipMallocHost(";
- if (data->args.hipMallocHost.ptr == NULL) oss << "ptr=NULL";
- else oss << "ptr=" << data->args.hipMallocHost.ptr__val;
- oss << ", size=" << data->args.hipMallocHost.size;
- oss << ")";
- break;
- case HIP_API_ID_hipCtxGetCurrent:
- oss << "hipCtxGetCurrent(";
- if (data->args.hipCtxGetCurrent.ctx == NULL) oss << "ctx=NULL";
- else oss << "ctx=" << data->args.hipCtxGetCurrent.ctx__val;
- oss << ")";
- break;
- case HIP_API_ID_hipDevicePrimaryCtxGetState:
- oss << "hipDevicePrimaryCtxGetState(";
- oss << "dev=" << data->args.hipDevicePrimaryCtxGetState.dev;
- if (data->args.hipDevicePrimaryCtxGetState.flags == NULL) oss << ", flags=NULL";
- else oss << ", flags=" << data->args.hipDevicePrimaryCtxGetState.flags__val;
- if (data->args.hipDevicePrimaryCtxGetState.active == NULL) oss << ", active=NULL";
- else oss << ", active=" << data->args.hipDevicePrimaryCtxGetState.active__val;
- oss << ")";
- break;
- case HIP_API_ID_hipEventQuery:
- oss << "hipEventQuery(";
- oss << "event=" << data->args.hipEventQuery.event;
- oss << ")";
- break;
- case HIP_API_ID_hipEventCreate:
- oss << "hipEventCreate(";
- if (data->args.hipEventCreate.event == NULL) oss << "event=NULL";
- else oss << "event=" << data->args.hipEventCreate.event__val;
- oss << ")";
- break;
- case HIP_API_ID_hipMemGetAddressRange:
- oss << "hipMemGetAddressRange(";
- if (data->args.hipMemGetAddressRange.pbase == NULL) oss << "pbase=NULL";
- else oss << "pbase=" << data->args.hipMemGetAddressRange.pbase__val;
- if (data->args.hipMemGetAddressRange.psize == NULL) oss << ", psize=NULL";
- else oss << ", psize=" << data->args.hipMemGetAddressRange.psize__val;
- oss << ", dptr=" << data->args.hipMemGetAddressRange.dptr;
- oss << ")";
- break;
- case HIP_API_ID_hipMemcpyFromSymbol:
- oss << "hipMemcpyFromSymbol(";
- oss << "dst=" << data->args.hipMemcpyFromSymbol.dst;
- oss << ", symbol=" << data->args.hipMemcpyFromSymbol.symbol;
- oss << ", sizeBytes=" << data->args.hipMemcpyFromSymbol.sizeBytes;
- oss << ", offset=" << data->args.hipMemcpyFromSymbol.offset;
- oss << ", kind=" << data->args.hipMemcpyFromSymbol.kind;
- oss << ")";
- break;
- case HIP_API_ID_hipArrayCreate:
- oss << "hipArrayCreate(";
- if (data->args.hipArrayCreate.pHandle == NULL) oss << "pHandle=NULL";
- else oss << "pHandle=" << (void*)data->args.hipArrayCreate.pHandle__val;
- if (data->args.hipArrayCreate.pAllocateArray == NULL) oss << ", pAllocateArray=NULL";
- else oss << ", pAllocateArray=" << data->args.hipArrayCreate.pAllocateArray__val;
- oss << ")";
- break;
- case HIP_API_ID_hipStreamAttachMemAsync:
- oss << "hipStreamAttachMemAsync(";
- oss << "stream=" << data->args.hipStreamAttachMemAsync.stream;
- if (data->args.hipStreamAttachMemAsync.dev_ptr == NULL) oss << ", dev_ptr=NULL";
- else oss << ", dev_ptr=" << data->args.hipStreamAttachMemAsync.dev_ptr__val;
- oss << ", length=" << data->args.hipStreamAttachMemAsync.length;
- oss << ", flags=" << data->args.hipStreamAttachMemAsync.flags;
- oss << ")";
- break;
- case HIP_API_ID_hipStreamGetFlags:
- oss << "hipStreamGetFlags(";
- oss << "stream=" << data->args.hipStreamGetFlags.stream;
- if (data->args.hipStreamGetFlags.flags == NULL) oss << ", flags=NULL";
- else oss << ", flags=" << data->args.hipStreamGetFlags.flags__val;
- oss << ")";
- break;
- case HIP_API_ID_hipMallocArray:
- oss << "hipMallocArray(";
- if (data->args.hipMallocArray.array == NULL) oss << "array=NULL";
- else oss << "array=" << (void*)data->args.hipMallocArray.array__val;
- if (data->args.hipMallocArray.desc == NULL) oss << ", desc=NULL";
- else oss << ", desc=" << data->args.hipMallocArray.desc__val;
- oss << ", width=" << data->args.hipMallocArray.width;
- oss << ", height=" << data->args.hipMallocArray.height;
- oss << ", flags=" << data->args.hipMallocArray.flags;
- oss << ")";
- break;
- case HIP_API_ID_hipCtxGetSharedMemConfig:
- oss << "hipCtxGetSharedMemConfig(";
- if (data->args.hipCtxGetSharedMemConfig.pConfig == NULL) oss << "pConfig=NULL";
- else oss << "pConfig=" << data->args.hipCtxGetSharedMemConfig.pConfig__val;
- oss << ")";
- break;
- case HIP_API_ID_hipDeviceDisablePeerAccess:
- oss << "hipDeviceDisablePeerAccess(";
- oss << "peerDeviceId=" << data->args.hipDeviceDisablePeerAccess.peerDeviceId;
- oss << ")";
- break;
- case HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSize:
- oss << "hipModuleOccupancyMaxPotentialBlockSize(";
- if (data->args.hipModuleOccupancyMaxPotentialBlockSize.gridSize == NULL) oss << "gridSize=NULL";
- else oss << "gridSize=" << data->args.hipModuleOccupancyMaxPotentialBlockSize.gridSize__val;
- if (data->args.hipModuleOccupancyMaxPotentialBlockSize.blockSize == NULL) oss << ", blockSize=NULL";
- else oss << ", blockSize=" << data->args.hipModuleOccupancyMaxPotentialBlockSize.blockSize__val;
- oss << ", f=" << data->args.hipModuleOccupancyMaxPotentialBlockSize.f;
- oss << ", dynSharedMemPerBlk=" << data->args.hipModuleOccupancyMaxPotentialBlockSize.dynSharedMemPerBlk;
- oss << ", blockSizeLimit=" << data->args.hipModuleOccupancyMaxPotentialBlockSize.blockSizeLimit;
- oss << ")";
- break;
- case HIP_API_ID_hipMemPtrGetInfo:
- oss << "hipMemPtrGetInfo(";
- oss << "ptr=" << data->args.hipMemPtrGetInfo.ptr;
- if (data->args.hipMemPtrGetInfo.size == NULL) oss << ", size=NULL";
- else oss << ", size=" << data->args.hipMemPtrGetInfo.size__val;
- oss << ")";
- break;
- case HIP_API_ID_hipFuncGetAttribute:
- oss << "hipFuncGetAttribute(";
- if (data->args.hipFuncGetAttribute.value == NULL) oss << "value=NULL";
- else oss << "value=" << data->args.hipFuncGetAttribute.value__val;
- oss << ", attrib=" << data->args.hipFuncGetAttribute.attrib;
- oss << ", hfunc=" << data->args.hipFuncGetAttribute.hfunc;
- oss << ")";
- break;
- case HIP_API_ID_hipCtxGetFlags:
- oss << "hipCtxGetFlags(";
- if (data->args.hipCtxGetFlags.flags == NULL) oss << "flags=NULL";
- else oss << "flags=" << data->args.hipCtxGetFlags.flags__val;
- oss << ")";
- break;
- case HIP_API_ID_hipStreamDestroy:
- oss << "hipStreamDestroy(";
- oss << "stream=" << data->args.hipStreamDestroy.stream;
- oss << ")";
- break;
- case HIP_API_ID___hipPushCallConfiguration:
- oss << "__hipPushCallConfiguration(";
- oss << "gridDim=" << data->args.__hipPushCallConfiguration.gridDim;
- oss << ", blockDim=" << data->args.__hipPushCallConfiguration.blockDim;
- oss << ", sharedMem=" << data->args.__hipPushCallConfiguration.sharedMem;
- oss << ", stream=" << data->args.__hipPushCallConfiguration.stream;
- oss << ")";
- break;
- case HIP_API_ID_hipMemset3DAsync:
- oss << "hipMemset3DAsync(";
- oss << "pitchedDevPtr=" << data->args.hipMemset3DAsync.pitchedDevPtr;
- oss << ", value=" << data->args.hipMemset3DAsync.value;
- oss << ", extent=" << data->args.hipMemset3DAsync.extent;
- oss << ", stream=" << data->args.hipMemset3DAsync.stream;
- oss << ")";
- break;
- case HIP_API_ID_hipDeviceGetPCIBusId:
- oss << "hipDeviceGetPCIBusId(";
- if (data->args.hipDeviceGetPCIBusId.pciBusId == NULL) oss << "pciBusId=NULL";
- else oss << "pciBusId=" << data->args.hipDeviceGetPCIBusId.pciBusId__val;
- oss << ", len=" << data->args.hipDeviceGetPCIBusId.len;
- oss << ", device=" << data->args.hipDeviceGetPCIBusId.device;
- oss << ")";
- break;
- case HIP_API_ID_hipInit:
- oss << "hipInit(";
- oss << "flags=" << data->args.hipInit.flags;
- oss << ")";
- break;
- case HIP_API_ID_hipMemcpyAtoH:
- oss << "hipMemcpyAtoH(";
- oss << "dst=" << data->args.hipMemcpyAtoH.dst;
- if (data->args.hipMemcpyAtoH.srcArray == NULL) oss << ", srcArray=NULL";
- else oss << ", srcArray=" << data->args.hipMemcpyAtoH.srcArray__val;
- oss << ", srcOffset=" << data->args.hipMemcpyAtoH.srcOffset;
- oss << ", count=" << data->args.hipMemcpyAtoH.count;
- oss << ")";
- break;
- case HIP_API_ID_hipStreamGetPriority:
- oss << "hipStreamGetPriority(";
- oss << "stream=" << data->args.hipStreamGetPriority.stream;
- if (data->args.hipStreamGetPriority.priority == NULL) oss << ", priority=NULL";
- else oss << ", priority=" << data->args.hipStreamGetPriority.priority__val;
- oss << ")";
- break;
- case HIP_API_ID_hipMemset2D:
- oss << "hipMemset2D(";
- oss << "dst=" << data->args.hipMemset2D.dst;
- oss << ", pitch=" << data->args.hipMemset2D.pitch;
- oss << ", value=" << data->args.hipMemset2D.value;
- oss << ", width=" << data->args.hipMemset2D.width;
- oss << ", height=" << data->args.hipMemset2D.height;
- oss << ")";
- break;
- case HIP_API_ID_hipMemset2DAsync:
- oss << "hipMemset2DAsync(";
- oss << "dst=" << data->args.hipMemset2DAsync.dst;
- oss << ", pitch=" << data->args.hipMemset2DAsync.pitch;
- oss << ", value=" << data->args.hipMemset2DAsync.value;
- oss << ", width=" << data->args.hipMemset2DAsync.width;
- oss << ", height=" << data->args.hipMemset2DAsync.height;
- oss << ", stream=" << data->args.hipMemset2DAsync.stream;
- oss << ")";
- break;
- case HIP_API_ID_hipDeviceCanAccessPeer:
- oss << "hipDeviceCanAccessPeer(";
- if (data->args.hipDeviceCanAccessPeer.canAccessPeer == NULL) oss << "canAccessPeer=NULL";
- else oss << "canAccessPeer=" << data->args.hipDeviceCanAccessPeer.canAccessPeer__val;
- oss << ", deviceId=" << data->args.hipDeviceCanAccessPeer.deviceId;
- oss << ", peerDeviceId=" << data->args.hipDeviceCanAccessPeer.peerDeviceId;
- oss << ")";
- break;
- case HIP_API_ID_hipLaunchByPtr:
- oss << "hipLaunchByPtr(";
- oss << "hostFunction=" << data->args.hipLaunchByPtr.hostFunction;
- oss << ")";
- break;
- case HIP_API_ID_hipMemPrefetchAsync:
- oss << "hipMemPrefetchAsync(";
- oss << "dev_ptr=" << data->args.hipMemPrefetchAsync.dev_ptr;
- oss << ", count=" << data->args.hipMemPrefetchAsync.count;
- oss << ", device=" << data->args.hipMemPrefetchAsync.device;
- oss << ", stream=" << data->args.hipMemPrefetchAsync.stream;
- oss << ")";
- break;
- case HIP_API_ID_hipCtxDestroy:
- oss << "hipCtxDestroy(";
- oss << "ctx=" << data->args.hipCtxDestroy.ctx;
- oss << ")";
- break;
- case HIP_API_ID_hipMemsetD16Async:
- oss << "hipMemsetD16Async(";
- oss << "dest=" << data->args.hipMemsetD16Async.dest;
- oss << ", value=" << data->args.hipMemsetD16Async.value;
- oss << ", count=" << data->args.hipMemsetD16Async.count;
- oss << ", stream=" << data->args.hipMemsetD16Async.stream;
- oss << ")";
- break;
- case HIP_API_ID_hipModuleUnload:
- oss << "hipModuleUnload(";
- oss << "module=" << data->args.hipModuleUnload.module;
- oss << ")";
- break;
- case HIP_API_ID_hipHostUnregister:
- oss << "hipHostUnregister(";
- oss << "hostPtr=" << data->args.hipHostUnregister.hostPtr;
- oss << ")";
- break;
- case HIP_API_ID_hipProfilerStop:
- oss << "hipProfilerStop(";
- oss << ")";
- break;
- case HIP_API_ID_hipExtStreamCreateWithCUMask:
- oss << "hipExtStreamCreateWithCUMask(";
- if (data->args.hipExtStreamCreateWithCUMask.stream == NULL) oss << "stream=NULL";
- else oss << "stream=" << data->args.hipExtStreamCreateWithCUMask.stream__val;
- oss << ", cuMaskSize=" << data->args.hipExtStreamCreateWithCUMask.cuMaskSize;
- if (data->args.hipExtStreamCreateWithCUMask.cuMask == NULL) oss << ", cuMask=NULL";
- else oss << ", cuMask=" << data->args.hipExtStreamCreateWithCUMask.cuMask__val;
- oss << ")";
- break;
- case HIP_API_ID_hipStreamSynchronize:
- oss << "hipStreamSynchronize(";
- oss << "stream=" << data->args.hipStreamSynchronize.stream;
- oss << ")";
- break;
- case HIP_API_ID_hipFreeHost:
- oss << "hipFreeHost(";
- oss << "ptr=" << data->args.hipFreeHost.ptr;
- oss << ")";
- break;
- case HIP_API_ID_hipDeviceSetCacheConfig:
- oss << "hipDeviceSetCacheConfig(";
- oss << "cacheConfig=" << data->args.hipDeviceSetCacheConfig.cacheConfig;
- oss << ")";
- break;
- case HIP_API_ID_hipGetErrorName:
- oss << "hipGetErrorName(";
- oss << ")";
- break;
- case HIP_API_ID_hipMemcpyHtoD:
- oss << "hipMemcpyHtoD(";
- oss << "dst=" << data->args.hipMemcpyHtoD.dst;
- oss << ", src=" << data->args.hipMemcpyHtoD.src;
- oss << ", sizeBytes=" << data->args.hipMemcpyHtoD.sizeBytes;
- oss << ")";
- break;
- case HIP_API_ID_hipModuleGetGlobal:
- oss << "hipModuleGetGlobal(";
- if (data->args.hipModuleGetGlobal.dptr == NULL) oss << "dptr=NULL";
- else oss << "dptr=" << data->args.hipModuleGetGlobal.dptr__val;
- if (data->args.hipModuleGetGlobal.bytes == NULL) oss << ", bytes=NULL";
- else oss << ", bytes=" << data->args.hipModuleGetGlobal.bytes__val;
- oss << ", hmod=" << data->args.hipModuleGetGlobal.hmod;
- if (data->args.hipModuleGetGlobal.name == NULL) oss << ", name=NULL";
- else oss << ", name=" << data->args.hipModuleGetGlobal.name__val;
- oss << ")";
- break;
- case HIP_API_ID_hipMemcpyHtoA:
- oss << "hipMemcpyHtoA(";
- if (data->args.hipMemcpyHtoA.dstArray == NULL) oss << "dstArray=NULL";
- else oss << "dstArray=" << data->args.hipMemcpyHtoA.dstArray__val;
- oss << ", dstOffset=" << data->args.hipMemcpyHtoA.dstOffset;
- oss << ", srcHost=" << data->args.hipMemcpyHtoA.srcHost;
- oss << ", count=" << data->args.hipMemcpyHtoA.count;
- oss << ")";
- break;
- case HIP_API_ID_hipCtxCreate:
- oss << "hipCtxCreate(";
- if (data->args.hipCtxCreate.ctx == NULL) oss << "ctx=NULL";
- else oss << "ctx=" << data->args.hipCtxCreate.ctx__val;
- oss << ", flags=" << data->args.hipCtxCreate.flags;
- oss << ", device=" << data->args.hipCtxCreate.device;
- oss << ")";
- break;
- case HIP_API_ID_hipMemcpy2D:
- oss << "hipMemcpy2D(";
- oss << "dst=" << data->args.hipMemcpy2D.dst;
- oss << ", dpitch=" << data->args.hipMemcpy2D.dpitch;
- oss << ", src=" << data->args.hipMemcpy2D.src;
- oss << ", spitch=" << data->args.hipMemcpy2D.spitch;
- oss << ", width=" << data->args.hipMemcpy2D.width;
- oss << ", height=" << data->args.hipMemcpy2D.height;
- oss << ", kind=" << data->args.hipMemcpy2D.kind;
- oss << ")";
- break;
- case HIP_API_ID_hipIpcCloseMemHandle:
- oss << "hipIpcCloseMemHandle(";
- oss << "devPtr=" << data->args.hipIpcCloseMemHandle.devPtr;
- oss << ")";
- break;
- case HIP_API_ID_hipChooseDevice:
- oss << "hipChooseDevice(";
- if (data->args.hipChooseDevice.device == NULL) oss << "device=NULL";
- else oss << "device=" << data->args.hipChooseDevice.device__val;
- if (data->args.hipChooseDevice.prop == NULL) oss << ", prop=NULL";
- else oss << ", prop=" << data->args.hipChooseDevice.prop__val;
- oss << ")";
- break;
- case HIP_API_ID_hipDeviceSetSharedMemConfig:
- oss << "hipDeviceSetSharedMemConfig(";
- oss << "config=" << data->args.hipDeviceSetSharedMemConfig.config;
- oss << ")";
- break;
- case HIP_API_ID_hipMallocMipmappedArray:
- oss << "hipMallocMipmappedArray(";
- if (data->args.hipMallocMipmappedArray.mipmappedArray == NULL) oss << "mipmappedArray=NULL";
- else oss << "mipmappedArray=" << data->args.hipMallocMipmappedArray.mipmappedArray__val;
- if (data->args.hipMallocMipmappedArray.desc == NULL) oss << ", desc=NULL";
- else oss << ", desc=" << data->args.hipMallocMipmappedArray.desc__val;
- oss << ", extent=" << data->args.hipMallocMipmappedArray.extent;
- oss << ", numLevels=" << data->args.hipMallocMipmappedArray.numLevels;
- oss << ", flags=" << data->args.hipMallocMipmappedArray.flags;
- oss << ")";
- break;
- case HIP_API_ID_hipSetupArgument:
- oss << "hipSetupArgument(";
- oss << "arg=" << data->args.hipSetupArgument.arg;
- oss << ", size=" << data->args.hipSetupArgument.size;
- oss << ", offset=" << data->args.hipSetupArgument.offset;
- oss << ")";
- break;
- case HIP_API_ID_hipIpcGetEventHandle:
- oss << "hipIpcGetEventHandle(";
- if (data->args.hipIpcGetEventHandle.handle == NULL) oss << "handle=NULL";
- else oss << "handle=" << data->args.hipIpcGetEventHandle.handle__val;
- oss << ", event=" << data->args.hipIpcGetEventHandle.event;
- oss << ")";
- break;
- case HIP_API_ID_hipFreeArray:
- oss << "hipFreeArray(";
- if (data->args.hipFreeArray.array == NULL) oss << "array=NULL";
- else oss << "array=" << data->args.hipFreeArray.array__val;
- oss << ")";
- break;
- case HIP_API_ID_hipCtxSetCacheConfig:
- oss << "hipCtxSetCacheConfig(";
- oss << "cacheConfig=" << data->args.hipCtxSetCacheConfig.cacheConfig;
- oss << ")";
- break;
- case HIP_API_ID_hipFuncSetCacheConfig:
- oss << "hipFuncSetCacheConfig(";
- oss << "func=" << data->args.hipFuncSetCacheConfig.func;
- oss << ", config=" << data->args.hipFuncSetCacheConfig.config;
- oss << ")";
- break;
- case HIP_API_ID_hipLaunchKernel:
- oss << "hipLaunchKernel(";
- oss << "function_address=" << data->args.hipLaunchKernel.function_address;
- oss << ", numBlocks=" << data->args.hipLaunchKernel.numBlocks;
- oss << ", dimBlocks=" << data->args.hipLaunchKernel.dimBlocks;
- if (data->args.hipLaunchKernel.args == NULL) oss << ", args=NULL";
- else oss << ", args=" << data->args.hipLaunchKernel.args__val;
- oss << ", sharedMemBytes=" << data->args.hipLaunchKernel.sharedMemBytes;
- oss << ", stream=" << data->args.hipLaunchKernel.stream;
- oss << ")";
- break;
- case HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags:
- oss << "hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(";
- if (data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks == NULL) oss << "numBlocks=NULL";
- else oss << "numBlocks=" << data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks__val;
- oss << ", f=" << data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.f;
- oss << ", blockSize=" << data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.blockSize;
- oss << ", dynSharedMemPerBlk=" << data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.dynSharedMemPerBlk;
- oss << ", flags=" << data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.flags;
- oss << ")";
- break;
- case HIP_API_ID_hipModuleGetTexRef:
- oss << "hipModuleGetTexRef(";
- if (data->args.hipModuleGetTexRef.texRef == NULL) oss << "texRef=NULL";
- else oss << "texRef=" << (void*)data->args.hipModuleGetTexRef.texRef__val;
- oss << ", hmod=" << data->args.hipModuleGetTexRef.hmod;
- if (data->args.hipModuleGetTexRef.name == NULL) oss << ", name=NULL";
- else oss << ", name=" << data->args.hipModuleGetTexRef.name__val;
- oss << ")";
- break;
- case HIP_API_ID_hipFuncSetAttribute:
- oss << "hipFuncSetAttribute(";
- oss << "func=" << data->args.hipFuncSetAttribute.func;
- oss << ", attr=" << data->args.hipFuncSetAttribute.attr;
- oss << ", value=" << data->args.hipFuncSetAttribute.value;
- oss << ")";
- break;
- case HIP_API_ID_hipEventElapsedTime:
- oss << "hipEventElapsedTime(";
- if (data->args.hipEventElapsedTime.ms == NULL) oss << "ms=NULL";
- else oss << "ms=" << data->args.hipEventElapsedTime.ms__val;
- oss << ", start=" << data->args.hipEventElapsedTime.start;
- oss << ", stop=" << data->args.hipEventElapsedTime.stop;
- oss << ")";
- break;
- case HIP_API_ID_hipConfigureCall:
- oss << "hipConfigureCall(";
- oss << "gridDim=" << data->args.hipConfigureCall.gridDim;
- oss << ", blockDim=" << data->args.hipConfigureCall.blockDim;
- oss << ", sharedMem=" << data->args.hipConfigureCall.sharedMem;
- oss << ", stream=" << data->args.hipConfigureCall.stream;
- oss << ")";
- break;
- case HIP_API_ID_hipMemAdvise:
- oss << "hipMemAdvise(";
- oss << "dev_ptr=" << data->args.hipMemAdvise.dev_ptr;
- oss << ", count=" << data->args.hipMemAdvise.count;
- oss << ", advice=" << data->args.hipMemAdvise.advice;
- oss << ", device=" << data->args.hipMemAdvise.device;
- oss << ")";
- break;
- case HIP_API_ID_hipMemcpy3DAsync:
- oss << "hipMemcpy3DAsync(";
- if (data->args.hipMemcpy3DAsync.p == NULL) oss << "p=NULL";
- else oss << "p=" << data->args.hipMemcpy3DAsync.p__val;
- oss << ", stream=" << data->args.hipMemcpy3DAsync.stream;
- oss << ")";
- break;
- case HIP_API_ID_hipEventDestroy:
- oss << "hipEventDestroy(";
- oss << "event=" << data->args.hipEventDestroy.event;
- oss << ")";
- break;
- case HIP_API_ID_hipCtxPopCurrent:
- oss << "hipCtxPopCurrent(";
- if (data->args.hipCtxPopCurrent.ctx == NULL) oss << "ctx=NULL";
- else oss << "ctx=" << data->args.hipCtxPopCurrent.ctx__val;
- oss << ")";
- break;
- case HIP_API_ID_hipGetSymbolAddress:
- oss << "hipGetSymbolAddress(";
- if (data->args.hipGetSymbolAddress.devPtr == NULL) oss << "devPtr=NULL";
- else oss << "devPtr=" << data->args.hipGetSymbolAddress.devPtr__val;
- oss << ", symbol=" << data->args.hipGetSymbolAddress.symbol;
- oss << ")";
- break;
- case HIP_API_ID_hipHostGetFlags:
- oss << "hipHostGetFlags(";
- if (data->args.hipHostGetFlags.flagsPtr == NULL) oss << "flagsPtr=NULL";
- else oss << "flagsPtr=" << data->args.hipHostGetFlags.flagsPtr__val;
- oss << ", hostPtr=" << data->args.hipHostGetFlags.hostPtr;
- oss << ")";
- break;
- case HIP_API_ID_hipHostMalloc:
- oss << "hipHostMalloc(";
- if (data->args.hipHostMalloc.ptr == NULL) oss << "ptr=NULL";
- else oss << "ptr=" << data->args.hipHostMalloc.ptr__val;
- oss << ", size=" << data->args.hipHostMalloc.size;
- oss << ", flags=" << data->args.hipHostMalloc.flags;
- oss << ")";
- break;
- case HIP_API_ID_hipCtxSetSharedMemConfig:
- oss << "hipCtxSetSharedMemConfig(";
- oss << "config=" << data->args.hipCtxSetSharedMemConfig.config;
- oss << ")";
- break;
- case HIP_API_ID_hipFreeMipmappedArray:
- oss << "hipFreeMipmappedArray(";
- oss << "mipmappedArray=" << data->args.hipFreeMipmappedArray.mipmappedArray;
- oss << ")";
- break;
- case HIP_API_ID_hipMemGetInfo:
- oss << "hipMemGetInfo(";
- if (data->args.hipMemGetInfo.free == NULL) oss << "free=NULL";
- else oss << "free=" << data->args.hipMemGetInfo.free__val;
- if (data->args.hipMemGetInfo.total == NULL) oss << ", total=NULL";
- else oss << ", total=" << data->args.hipMemGetInfo.total__val;
- oss << ")";
- break;
- case HIP_API_ID_hipDeviceReset:
- oss << "hipDeviceReset(";
- oss << ")";
- break;
- case HIP_API_ID_hipMemset:
- oss << "hipMemset(";
- oss << "dst=" << data->args.hipMemset.dst;
- oss << ", value=" << data->args.hipMemset.value;
- oss << ", sizeBytes=" << data->args.hipMemset.sizeBytes;
- oss << ")";
- break;
- case HIP_API_ID_hipMemsetD8:
- oss << "hipMemsetD8(";
- oss << "dest=" << data->args.hipMemsetD8.dest;
- oss << ", value=" << data->args.hipMemsetD8.value;
- oss << ", count=" << data->args.hipMemsetD8.count;
- oss << ")";
- break;
- case HIP_API_ID_hipMemcpyParam2DAsync:
- oss << "hipMemcpyParam2DAsync(";
- if (data->args.hipMemcpyParam2DAsync.pCopy == NULL) oss << "pCopy=NULL";
- else oss << "pCopy=" << data->args.hipMemcpyParam2DAsync.pCopy__val;
- oss << ", stream=" << data->args.hipMemcpyParam2DAsync.stream;
- oss << ")";
- break;
- case HIP_API_ID_hipHostRegister:
- oss << "hipHostRegister(";
- oss << "hostPtr=" << data->args.hipHostRegister.hostPtr;
- oss << ", sizeBytes=" << data->args.hipHostRegister.sizeBytes;
- oss << ", flags=" << data->args.hipHostRegister.flags;
- oss << ")";
- break;
- case HIP_API_ID_hipDriverGetVersion:
- oss << "hipDriverGetVersion(";
- if (data->args.hipDriverGetVersion.driverVersion == NULL) oss << "driverVersion=NULL";
- else oss << "driverVersion=" << data->args.hipDriverGetVersion.driverVersion__val;
- oss << ")";
- break;
- case HIP_API_ID_hipArray3DCreate:
- oss << "hipArray3DCreate(";
- if (data->args.hipArray3DCreate.array == NULL) oss << "array=NULL";
- else oss << "array=" << (void*)data->args.hipArray3DCreate.array__val;
- if (data->args.hipArray3DCreate.pAllocateArray == NULL) oss << ", pAllocateArray=NULL";
- else oss << ", pAllocateArray=" << data->args.hipArray3DCreate.pAllocateArray__val;
- oss << ")";
- break;
- case HIP_API_ID_hipIpcOpenMemHandle:
- oss << "hipIpcOpenMemHandle(";
- if (data->args.hipIpcOpenMemHandle.devPtr == NULL) oss << "devPtr=NULL";
- else oss << "devPtr=" << data->args.hipIpcOpenMemHandle.devPtr__val;
- oss << ", handle=" << data->args.hipIpcOpenMemHandle.handle;
- oss << ", flags=" << data->args.hipIpcOpenMemHandle.flags;
- oss << ")";
- break;
- case HIP_API_ID_hipGetLastError:
- oss << "hipGetLastError(";
- oss << ")";
- break;
- case HIP_API_ID_hipGetDeviceFlags:
- oss << "hipGetDeviceFlags(";
- if (data->args.hipGetDeviceFlags.flags == NULL) oss << "flags=NULL";
- else oss << "flags=" << data->args.hipGetDeviceFlags.flags__val;
- oss << ")";
- break;
- case HIP_API_ID_hipDeviceGetSharedMemConfig:
- oss << "hipDeviceGetSharedMemConfig(";
- if (data->args.hipDeviceGetSharedMemConfig.pConfig == NULL) oss << "pConfig=NULL";
- else oss << "pConfig=" << data->args.hipDeviceGetSharedMemConfig.pConfig__val;
- oss << ")";
- break;
- case HIP_API_ID_hipDrvMemcpy3D:
- oss << "hipDrvMemcpy3D(";
- if (data->args.hipDrvMemcpy3D.pCopy == NULL) oss << "pCopy=NULL";
- else oss << "pCopy=" << data->args.hipDrvMemcpy3D.pCopy__val;
- oss << ")";
- break;
- case HIP_API_ID_hipMemcpy2DFromArray:
- oss << "hipMemcpy2DFromArray(";
- oss << "dst=" << data->args.hipMemcpy2DFromArray.dst;
- oss << ", dpitch=" << data->args.hipMemcpy2DFromArray.dpitch;
- oss << ", src=" << data->args.hipMemcpy2DFromArray.src;
- oss << ", wOffset=" << data->args.hipMemcpy2DFromArray.wOffset;
- oss << ", hOffset=" << data->args.hipMemcpy2DFromArray.hOffset;
- oss << ", width=" << data->args.hipMemcpy2DFromArray.width;
- oss << ", height=" << data->args.hipMemcpy2DFromArray.height;
- oss << ", kind=" << data->args.hipMemcpy2DFromArray.kind;
- oss << ")";
- break;
- case HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags:
- oss << "hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(";
- if (data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks == NULL) oss << "numBlocks=NULL";
- else oss << "numBlocks=" << data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks__val;
- oss << ", f=" << data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.f;
- oss << ", blockSize=" << data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.blockSize;
- oss << ", dynamicSMemSize=" << data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.dynamicSMemSize;
- oss << ", flags=" << data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.flags;
- oss << ")";
- break;
- case HIP_API_ID_hipSetDeviceFlags:
- oss << "hipSetDeviceFlags(";
- oss << "flags=" << data->args.hipSetDeviceFlags.flags;
- oss << ")";
- break;
- case HIP_API_ID_hipHccModuleLaunchKernel:
- oss << "hipHccModuleLaunchKernel(";
- oss << "f=" << data->args.hipHccModuleLaunchKernel.f;
- oss << ", globalWorkSizeX=" << data->args.hipHccModuleLaunchKernel.globalWorkSizeX;
- oss << ", globalWorkSizeY=" << data->args.hipHccModuleLaunchKernel.globalWorkSizeY;
- oss << ", globalWorkSizeZ=" << data->args.hipHccModuleLaunchKernel.globalWorkSizeZ;
- oss << ", blockDimX=" << data->args.hipHccModuleLaunchKernel.blockDimX;
- oss << ", blockDimY=" << data->args.hipHccModuleLaunchKernel.blockDimY;
- oss << ", blockDimZ=" << data->args.hipHccModuleLaunchKernel.blockDimZ;
- oss << ", sharedMemBytes=" << data->args.hipHccModuleLaunchKernel.sharedMemBytes;
- oss << ", hStream=" << data->args.hipHccModuleLaunchKernel.hStream;
- if (data->args.hipHccModuleLaunchKernel.kernelParams == NULL) oss << ", kernelParams=NULL";
- else oss << ", kernelParams=" << data->args.hipHccModuleLaunchKernel.kernelParams__val;
- if (data->args.hipHccModuleLaunchKernel.extra == NULL) oss << ", extra=NULL";
- else oss << ", extra=" << data->args.hipHccModuleLaunchKernel.extra__val;
- oss << ", startEvent=" << data->args.hipHccModuleLaunchKernel.startEvent;
- oss << ", stopEvent=" << data->args.hipHccModuleLaunchKernel.stopEvent;
- oss << ")";
- break;
- case HIP_API_ID_hipFree:
- oss << "hipFree(";
- oss << "ptr=" << data->args.hipFree.ptr;
- oss << ")";
- break;
- case HIP_API_ID_hipOccupancyMaxPotentialBlockSize:
- oss << "hipOccupancyMaxPotentialBlockSize(";
- if (data->args.hipOccupancyMaxPotentialBlockSize.gridSize == NULL) oss << "gridSize=NULL";
- else oss << "gridSize=" << data->args.hipOccupancyMaxPotentialBlockSize.gridSize__val;
- if (data->args.hipOccupancyMaxPotentialBlockSize.blockSize == NULL) oss << ", blockSize=NULL";
- else oss << ", blockSize=" << data->args.hipOccupancyMaxPotentialBlockSize.blockSize__val;
- oss << ", f=" << data->args.hipOccupancyMaxPotentialBlockSize.f;
- oss << ", dynSharedMemPerBlk=" << data->args.hipOccupancyMaxPotentialBlockSize.dynSharedMemPerBlk;
- oss << ", blockSizeLimit=" << data->args.hipOccupancyMaxPotentialBlockSize.blockSizeLimit;
- oss << ")";
- break;
- case HIP_API_ID_hipDeviceGetAttribute:
- oss << "hipDeviceGetAttribute(";
- if (data->args.hipDeviceGetAttribute.pi == NULL) oss << "pi=NULL";
- else oss << "pi=" << data->args.hipDeviceGetAttribute.pi__val;
- oss << ", attr=" << data->args.hipDeviceGetAttribute.attr;
- oss << ", deviceId=" << data->args.hipDeviceGetAttribute.deviceId;
- oss << ")";
- break;
- case HIP_API_ID_hipDeviceComputeCapability:
- oss << "hipDeviceComputeCapability(";
- if (data->args.hipDeviceComputeCapability.major == NULL) oss << "major=NULL";
- else oss << "major=" << data->args.hipDeviceComputeCapability.major__val;
- if (data->args.hipDeviceComputeCapability.minor == NULL) oss << ", minor=NULL";
- else oss << ", minor=" << data->args.hipDeviceComputeCapability.minor__val;
- oss << ", device=" << data->args.hipDeviceComputeCapability.device;
- oss << ")";
- break;
- case HIP_API_ID_hipCtxDisablePeerAccess:
- oss << "hipCtxDisablePeerAccess(";
- oss << "peerCtx=" << data->args.hipCtxDisablePeerAccess.peerCtx;
- oss << ")";
- break;
- case HIP_API_ID_hipMallocManaged:
- oss << "hipMallocManaged(";
- if (data->args.hipMallocManaged.dev_ptr == NULL) oss << "dev_ptr=NULL";
- else oss << "dev_ptr=" << data->args.hipMallocManaged.dev_ptr__val;
- oss << ", size=" << data->args.hipMallocManaged.size;
- oss << ", flags=" << data->args.hipMallocManaged.flags;
- oss << ")";
- break;
- case HIP_API_ID_hipDeviceGetByPCIBusId:
- oss << "hipDeviceGetByPCIBusId(";
- if (data->args.hipDeviceGetByPCIBusId.device == NULL) oss << "device=NULL";
- else oss << "device=" << data->args.hipDeviceGetByPCIBusId.device__val;
- if (data->args.hipDeviceGetByPCIBusId.pciBusId == NULL) oss << ", pciBusId=NULL";
- else oss << ", pciBusId=" << data->args.hipDeviceGetByPCIBusId.pciBusId__val;
- oss << ")";
- break;
- case HIP_API_ID_hipIpcGetMemHandle:
- oss << "hipIpcGetMemHandle(";
- if (data->args.hipIpcGetMemHandle.handle == NULL) oss << "handle=NULL";
- else oss << "handle=" << data->args.hipIpcGetMemHandle.handle__val;
- oss << ", devPtr=" << data->args.hipIpcGetMemHandle.devPtr;
- oss << ")";
- break;
- case HIP_API_ID_hipMemcpyHtoDAsync:
- oss << "hipMemcpyHtoDAsync(";
- oss << "dst=" << data->args.hipMemcpyHtoDAsync.dst;
- oss << ", src=" << data->args.hipMemcpyHtoDAsync.src;
- oss << ", sizeBytes=" << data->args.hipMemcpyHtoDAsync.sizeBytes;
- oss << ", stream=" << data->args.hipMemcpyHtoDAsync.stream;
- oss << ")";
- break;
- case HIP_API_ID_hipCtxGetDevice:
- oss << "hipCtxGetDevice(";
- if (data->args.hipCtxGetDevice.device == NULL) oss << "device=NULL";
- else oss << "device=" << data->args.hipCtxGetDevice.device__val;
- oss << ")";
- break;
- case HIP_API_ID_hipMemcpyDtoD:
- oss << "hipMemcpyDtoD(";
- oss << "dst=" << data->args.hipMemcpyDtoD.dst;
- oss << ", src=" << data->args.hipMemcpyDtoD.src;
- oss << ", sizeBytes=" << data->args.hipMemcpyDtoD.sizeBytes;
- oss << ")";
- break;
- case HIP_API_ID_hipModuleLoadData:
- oss << "hipModuleLoadData(";
- if (data->args.hipModuleLoadData.module == NULL) oss << "module=NULL";
- else oss << "module=" << data->args.hipModuleLoadData.module__val;
- oss << ", image=" << data->args.hipModuleLoadData.image;
- oss << ")";
- break;
- case HIP_API_ID_hipDevicePrimaryCtxRelease:
- oss << "hipDevicePrimaryCtxRelease(";
- oss << "dev=" << data->args.hipDevicePrimaryCtxRelease.dev;
- oss << ")";
- break;
- case HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessor:
- oss << "hipOccupancyMaxActiveBlocksPerMultiprocessor(";
- if (data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks == NULL) oss << "numBlocks=NULL";
- else oss << "numBlocks=" << data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks__val;
- oss << ", f=" << data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.f;
- oss << ", blockSize=" << data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.blockSize;
- oss << ", dynamicSMemSize=" << data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.dynamicSMemSize;
- oss << ")";
- break;
- case HIP_API_ID_hipCtxSetCurrent:
- oss << "hipCtxSetCurrent(";
- oss << "ctx=" << data->args.hipCtxSetCurrent.ctx;
- oss << ")";
- break;
- case HIP_API_ID_hipGetErrorString:
- oss << "hipGetErrorString(";
- oss << ")";
- break;
- case HIP_API_ID_hipStreamCreate:
- oss << "hipStreamCreate(";
- if (data->args.hipStreamCreate.stream == NULL) oss << "stream=NULL";
- else oss << "stream=" << data->args.hipStreamCreate.stream__val;
- oss << ")";
- break;
- case HIP_API_ID_hipDevicePrimaryCtxRetain:
- oss << "hipDevicePrimaryCtxRetain(";
- if (data->args.hipDevicePrimaryCtxRetain.pctx == NULL) oss << "pctx=NULL";
- else oss << "pctx=" << data->args.hipDevicePrimaryCtxRetain.pctx__val;
- oss << ", dev=" << data->args.hipDevicePrimaryCtxRetain.dev;
- oss << ")";
- break;
- case HIP_API_ID_hipDeviceGet:
- oss << "hipDeviceGet(";
- if (data->args.hipDeviceGet.device == NULL) oss << "device=NULL";
- else oss << "device=" << data->args.hipDeviceGet.device__val;
- oss << ", ordinal=" << data->args.hipDeviceGet.ordinal;
- oss << ")";
- break;
- case HIP_API_ID_hipStreamCreateWithFlags:
- oss << "hipStreamCreateWithFlags(";
- if (data->args.hipStreamCreateWithFlags.stream == NULL) oss << "stream=NULL";
- else oss << "stream=" << data->args.hipStreamCreateWithFlags.stream__val;
- oss << ", flags=" << data->args.hipStreamCreateWithFlags.flags;
- oss << ")";
- break;
- case HIP_API_ID_hipMemcpyFromArray:
- oss << "hipMemcpyFromArray(";
- oss << "dst=" << data->args.hipMemcpyFromArray.dst;
- oss << ", srcArray=" << data->args.hipMemcpyFromArray.srcArray;
- oss << ", wOffset=" << data->args.hipMemcpyFromArray.wOffset;
- oss << ", hOffset=" << data->args.hipMemcpyFromArray.hOffset;
- oss << ", count=" << data->args.hipMemcpyFromArray.count;
- oss << ", kind=" << data->args.hipMemcpyFromArray.kind;
- oss << ")";
- break;
- case HIP_API_ID_hipMemcpy2DAsync:
- oss << "hipMemcpy2DAsync(";
- oss << "dst=" << data->args.hipMemcpy2DAsync.dst;
- oss << ", dpitch=" << data->args.hipMemcpy2DAsync.dpitch;
- oss << ", src=" << data->args.hipMemcpy2DAsync.src;
- oss << ", spitch=" << data->args.hipMemcpy2DAsync.spitch;
- oss << ", width=" << data->args.hipMemcpy2DAsync.width;
- oss << ", height=" << data->args.hipMemcpy2DAsync.height;
- oss << ", kind=" << data->args.hipMemcpy2DAsync.kind;
- oss << ", stream=" << data->args.hipMemcpy2DAsync.stream;
- oss << ")";
- break;
- case HIP_API_ID_hipFuncGetAttributes:
- oss << "hipFuncGetAttributes(";
- if (data->args.hipFuncGetAttributes.attr == NULL) oss << "attr=NULL";
- else oss << "attr=" << data->args.hipFuncGetAttributes.attr__val;
- oss << ", func=" << data->args.hipFuncGetAttributes.func;
- oss << ")";
- break;
- case HIP_API_ID_hipGetSymbolSize:
- oss << "hipGetSymbolSize(";
- if (data->args.hipGetSymbolSize.size == NULL) oss << "size=NULL";
- else oss << "size=" << data->args.hipGetSymbolSize.size__val;
- oss << ", symbol=" << data->args.hipGetSymbolSize.symbol;
- oss << ")";
- break;
- case HIP_API_ID_hipHostFree:
- oss << "hipHostFree(";
- oss << "ptr=" << data->args.hipHostFree.ptr;
- oss << ")";
- break;
- case HIP_API_ID_hipEventCreateWithFlags:
- oss << "hipEventCreateWithFlags(";
- if (data->args.hipEventCreateWithFlags.event == NULL) oss << "event=NULL";
- else oss << "event=" << data->args.hipEventCreateWithFlags.event__val;
- oss << ", flags=" << data->args.hipEventCreateWithFlags.flags;
- oss << ")";
- break;
- case HIP_API_ID_hipStreamQuery:
- oss << "hipStreamQuery(";
- oss << "stream=" << data->args.hipStreamQuery.stream;
- oss << ")";
- break;
- case HIP_API_ID_hipMemcpy3D:
- oss << "hipMemcpy3D(";
- if (data->args.hipMemcpy3D.p == NULL) oss << "p=NULL";
- else oss << "p=" << data->args.hipMemcpy3D.p__val;
- oss << ")";
- break;
- case HIP_API_ID_hipMemcpyToSymbol:
- oss << "hipMemcpyToSymbol(";
- oss << "symbol=" << data->args.hipMemcpyToSymbol.symbol;
- oss << ", src=" << data->args.hipMemcpyToSymbol.src;
- oss << ", sizeBytes=" << data->args.hipMemcpyToSymbol.sizeBytes;
- oss << ", offset=" << data->args.hipMemcpyToSymbol.offset;
- oss << ", kind=" << data->args.hipMemcpyToSymbol.kind;
- oss << ")";
- break;
- case HIP_API_ID_hipMemcpy:
- oss << "hipMemcpy(";
- oss << "dst=" << data->args.hipMemcpy.dst;
- oss << ", src=" << data->args.hipMemcpy.src;
- oss << ", sizeBytes=" << data->args.hipMemcpy.sizeBytes;
- oss << ", kind=" << data->args.hipMemcpy.kind;
- oss << ")";
- break;
- case HIP_API_ID_hipPeekAtLastError:
- oss << "hipPeekAtLastError(";
- oss << ")";
- break;
- case HIP_API_ID_hipExtLaunchMultiKernelMultiDevice:
- oss << "hipExtLaunchMultiKernelMultiDevice(";
- if (data->args.hipExtLaunchMultiKernelMultiDevice.launchParamsList == NULL) oss << "launchParamsList=NULL";
- else oss << "launchParamsList=" << data->args.hipExtLaunchMultiKernelMultiDevice.launchParamsList__val;
- oss << ", numDevices=" << data->args.hipExtLaunchMultiKernelMultiDevice.numDevices;
- oss << ", flags=" << data->args.hipExtLaunchMultiKernelMultiDevice.flags;
- oss << ")";
- break;
- case HIP_API_ID_hipHostAlloc:
- oss << "hipHostAlloc(";
- if (data->args.hipHostAlloc.ptr == NULL) oss << "ptr=NULL";
- else oss << "ptr=" << data->args.hipHostAlloc.ptr__val;
- oss << ", size=" << data->args.hipHostAlloc.size;
- oss << ", flags=" << data->args.hipHostAlloc.flags;
- oss << ")";
- break;
- case HIP_API_ID_hipStreamAddCallback:
- oss << "hipStreamAddCallback(";
- oss << "stream=" << data->args.hipStreamAddCallback.stream;
- oss << ", callback=" << data->args.hipStreamAddCallback.callback;
- oss << ", userData=" << data->args.hipStreamAddCallback.userData;
- oss << ", flags=" << data->args.hipStreamAddCallback.flags;
- oss << ")";
- break;
- case HIP_API_ID_hipMemcpyToArray:
- oss << "hipMemcpyToArray(";
- if (data->args.hipMemcpyToArray.dst == NULL) oss << "dst=NULL";
- else oss << "dst=" << data->args.hipMemcpyToArray.dst__val;
- oss << ", wOffset=" << data->args.hipMemcpyToArray.wOffset;
- oss << ", hOffset=" << data->args.hipMemcpyToArray.hOffset;
- oss << ", src=" << data->args.hipMemcpyToArray.src;
- oss << ", count=" << data->args.hipMemcpyToArray.count;
- oss << ", kind=" << data->args.hipMemcpyToArray.kind;
- oss << ")";
- break;
- case HIP_API_ID_hipMemsetD32:
- oss << "hipMemsetD32(";
- oss << "dest=" << data->args.hipMemsetD32.dest;
- oss << ", value=" << data->args.hipMemsetD32.value;
- oss << ", count=" << data->args.hipMemsetD32.count;
- oss << ")";
- break;
- case HIP_API_ID_hipExtModuleLaunchKernel:
- oss << "hipExtModuleLaunchKernel(";
- oss << "f=" << data->args.hipExtModuleLaunchKernel.f;
- oss << ", globalWorkSizeX=" << data->args.hipExtModuleLaunchKernel.globalWorkSizeX;
- oss << ", globalWorkSizeY=" << data->args.hipExtModuleLaunchKernel.globalWorkSizeY;
- oss << ", globalWorkSizeZ=" << data->args.hipExtModuleLaunchKernel.globalWorkSizeZ;
- oss << ", localWorkSizeX=" << data->args.hipExtModuleLaunchKernel.localWorkSizeX;
- oss << ", localWorkSizeY=" << data->args.hipExtModuleLaunchKernel.localWorkSizeY;
- oss << ", localWorkSizeZ=" << data->args.hipExtModuleLaunchKernel.localWorkSizeZ;
- oss << ", sharedMemBytes=" << data->args.hipExtModuleLaunchKernel.sharedMemBytes;
- oss << ", hStream=" << data->args.hipExtModuleLaunchKernel.hStream;
- if (data->args.hipExtModuleLaunchKernel.kernelParams == NULL) oss << ", kernelParams=NULL";
- else oss << ", kernelParams=" << data->args.hipExtModuleLaunchKernel.kernelParams__val;
- if (data->args.hipExtModuleLaunchKernel.extra == NULL) oss << ", extra=NULL";
- else oss << ", extra=" << data->args.hipExtModuleLaunchKernel.extra__val;
- oss << ", startEvent=" << data->args.hipExtModuleLaunchKernel.startEvent;
- oss << ", stopEvent=" << data->args.hipExtModuleLaunchKernel.stopEvent;
- oss << ", flags=" << data->args.hipExtModuleLaunchKernel.flags;
- oss << ")";
- break;
- case HIP_API_ID_hipDeviceSynchronize:
- oss << "hipDeviceSynchronize(";
- oss << ")";
- break;
- case HIP_API_ID_hipDeviceGetCacheConfig:
- oss << "hipDeviceGetCacheConfig(";
- if (data->args.hipDeviceGetCacheConfig.cacheConfig == NULL) oss << "cacheConfig=NULL";
- else oss << "cacheConfig=" << data->args.hipDeviceGetCacheConfig.cacheConfig__val;
- oss << ")";
- break;
- case HIP_API_ID_hipMalloc3D:
- oss << "hipMalloc3D(";
- if (data->args.hipMalloc3D.pitchedDevPtr == NULL) oss << "pitchedDevPtr=NULL";
- else oss << "pitchedDevPtr=" << data->args.hipMalloc3D.pitchedDevPtr__val;
- oss << ", extent=" << data->args.hipMalloc3D.extent;
- oss << ")";
- break;
- case HIP_API_ID_hipPointerGetAttributes:
- oss << "hipPointerGetAttributes(";
- if (data->args.hipPointerGetAttributes.attributes == NULL) oss << "attributes=NULL";
- else oss << "attributes=" << data->args.hipPointerGetAttributes.attributes__val;
- oss << ", ptr=" << data->args.hipPointerGetAttributes.ptr;
- oss << ")";
- break;
- case HIP_API_ID_hipMemsetAsync:
- oss << "hipMemsetAsync(";
- oss << "dst=" << data->args.hipMemsetAsync.dst;
- oss << ", value=" << data->args.hipMemsetAsync.value;
- oss << ", sizeBytes=" << data->args.hipMemsetAsync.sizeBytes;
- oss << ", stream=" << data->args.hipMemsetAsync.stream;
- oss << ")";
- break;
- case HIP_API_ID_hipDeviceGetName:
- oss << "hipDeviceGetName(";
- if (data->args.hipDeviceGetName.name == NULL) oss << "name=NULL";
- else oss << "name=" << data->args.hipDeviceGetName.name__val;
- oss << ", len=" << data->args.hipDeviceGetName.len;
- oss << ", device=" << data->args.hipDeviceGetName.device;
- oss << ")";
- break;
- case HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSizeWithFlags:
- oss << "hipModuleOccupancyMaxPotentialBlockSizeWithFlags(";
- if (data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.gridSize == NULL) oss << "gridSize=NULL";
- else oss << "gridSize=" << data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.gridSize__val;
- if (data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSize == NULL) oss << ", blockSize=NULL";
- else oss << ", blockSize=" << data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSize__val;
- oss << ", f=" << data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.f;
- oss << ", dynSharedMemPerBlk=" << data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.dynSharedMemPerBlk;
- oss << ", blockSizeLimit=" << data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSizeLimit;
- oss << ", flags=" << data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.flags;
- oss << ")";
- break;
- case HIP_API_ID_hipCtxPushCurrent:
- oss << "hipCtxPushCurrent(";
- oss << "ctx=" << data->args.hipCtxPushCurrent.ctx;
- oss << ")";
- break;
- case HIP_API_ID_hipMemcpyPeer:
- oss << "hipMemcpyPeer(";
- oss << "dst=" << data->args.hipMemcpyPeer.dst;
- oss << ", dstDeviceId=" << data->args.hipMemcpyPeer.dstDeviceId;
- oss << ", src=" << data->args.hipMemcpyPeer.src;
- oss << ", srcDeviceId=" << data->args.hipMemcpyPeer.srcDeviceId;
- oss << ", sizeBytes=" << data->args.hipMemcpyPeer.sizeBytes;
- oss << ")";
- break;
- case HIP_API_ID_hipEventSynchronize:
- oss << "hipEventSynchronize(";
- oss << "event=" << data->args.hipEventSynchronize.event;
- oss << ")";
- break;
- case HIP_API_ID_hipMemcpyDtoDAsync:
- oss << "hipMemcpyDtoDAsync(";
- oss << "dst=" << data->args.hipMemcpyDtoDAsync.dst;
- oss << ", src=" << data->args.hipMemcpyDtoDAsync.src;
- oss << ", sizeBytes=" << data->args.hipMemcpyDtoDAsync.sizeBytes;
- oss << ", stream=" << data->args.hipMemcpyDtoDAsync.stream;
- oss << ")";
- break;
- case HIP_API_ID_hipProfilerStart:
- oss << "hipProfilerStart(";
- oss << ")";
- break;
- case HIP_API_ID_hipExtMallocWithFlags:
- oss << "hipExtMallocWithFlags(";
- if (data->args.hipExtMallocWithFlags.ptr == NULL) oss << "ptr=NULL";
- else oss << "ptr=" << data->args.hipExtMallocWithFlags.ptr__val;
- oss << ", sizeBytes=" << data->args.hipExtMallocWithFlags.sizeBytes;
- oss << ", flags=" << data->args.hipExtMallocWithFlags.flags;
- oss << ")";
- break;
- case HIP_API_ID_hipCtxEnablePeerAccess:
- oss << "hipCtxEnablePeerAccess(";
- oss << "peerCtx=" << data->args.hipCtxEnablePeerAccess.peerCtx;
- oss << ", flags=" << data->args.hipCtxEnablePeerAccess.flags;
- oss << ")";
- break;
- case HIP_API_ID_hipMemAllocHost:
- oss << "hipMemAllocHost(";
- if (data->args.hipMemAllocHost.ptr == NULL) oss << "ptr=NULL";
- else oss << "ptr=" << data->args.hipMemAllocHost.ptr__val;
- oss << ", size=" << data->args.hipMemAllocHost.size;
- oss << ")";
- break;
- case HIP_API_ID_hipMemcpyDtoHAsync:
- oss << "hipMemcpyDtoHAsync(";
- oss << "dst=" << data->args.hipMemcpyDtoHAsync.dst;
- oss << ", src=" << data->args.hipMemcpyDtoHAsync.src;
- oss << ", sizeBytes=" << data->args.hipMemcpyDtoHAsync.sizeBytes;
- oss << ", stream=" << data->args.hipMemcpyDtoHAsync.stream;
- oss << ")";
- break;
- case HIP_API_ID_hipModuleLaunchKernel:
- oss << "hipModuleLaunchKernel(";
- oss << "f=" << data->args.hipModuleLaunchKernel.f;
- oss << ", gridDimX=" << data->args.hipModuleLaunchKernel.gridDimX;
- oss << ", gridDimY=" << data->args.hipModuleLaunchKernel.gridDimY;
- oss << ", gridDimZ=" << data->args.hipModuleLaunchKernel.gridDimZ;
- oss << ", blockDimX=" << data->args.hipModuleLaunchKernel.blockDimX;
- oss << ", blockDimY=" << data->args.hipModuleLaunchKernel.blockDimY;
- oss << ", blockDimZ=" << data->args.hipModuleLaunchKernel.blockDimZ;
- oss << ", sharedMemBytes=" << data->args.hipModuleLaunchKernel.sharedMemBytes;
- oss << ", stream=" << data->args.hipModuleLaunchKernel.stream;
- if (data->args.hipModuleLaunchKernel.kernelParams == NULL) oss << ", kernelParams=NULL";
- else oss << ", kernelParams=" << data->args.hipModuleLaunchKernel.kernelParams__val;
- if (data->args.hipModuleLaunchKernel.extra == NULL) oss << ", extra=NULL";
- else oss << ", extra=" << data->args.hipModuleLaunchKernel.extra__val;
- oss << ")";
- break;
- case HIP_API_ID_hipMemAllocPitch:
- oss << "hipMemAllocPitch(";
- if (data->args.hipMemAllocPitch.dptr == NULL) oss << "dptr=NULL";
- else oss << "dptr=" << data->args.hipMemAllocPitch.dptr__val;
- if (data->args.hipMemAllocPitch.pitch == NULL) oss << ", pitch=NULL";
- else oss << ", pitch=" << data->args.hipMemAllocPitch.pitch__val;
- oss << ", widthInBytes=" << data->args.hipMemAllocPitch.widthInBytes;
- oss << ", height=" << data->args.hipMemAllocPitch.height;
- oss << ", elementSizeBytes=" << data->args.hipMemAllocPitch.elementSizeBytes;
- oss << ")";
- break;
- case HIP_API_ID_hipExtLaunchKernel:
- oss << "hipExtLaunchKernel(";
- oss << "function_address=" << data->args.hipExtLaunchKernel.function_address;
- oss << ", numBlocks=" << data->args.hipExtLaunchKernel.numBlocks;
- oss << ", dimBlocks=" << data->args.hipExtLaunchKernel.dimBlocks;
- if (data->args.hipExtLaunchKernel.args == NULL) oss << ", args=NULL";
- else oss << ", args=" << data->args.hipExtLaunchKernel.args__val;
- oss << ", sharedMemBytes=" << data->args.hipExtLaunchKernel.sharedMemBytes;
- oss << ", stream=" << data->args.hipExtLaunchKernel.stream;
- oss << ", startEvent=" << data->args.hipExtLaunchKernel.startEvent;
- oss << ", stopEvent=" << data->args.hipExtLaunchKernel.stopEvent;
- oss << ", flags=" << data->args.hipExtLaunchKernel.flags;
- oss << ")";
- break;
- case HIP_API_ID_hipMemcpy2DFromArrayAsync:
- oss << "hipMemcpy2DFromArrayAsync(";
- oss << "dst=" << data->args.hipMemcpy2DFromArrayAsync.dst;
- oss << ", dpitch=" << data->args.hipMemcpy2DFromArrayAsync.dpitch;
- oss << ", src=" << data->args.hipMemcpy2DFromArrayAsync.src;
- oss << ", wOffset=" << data->args.hipMemcpy2DFromArrayAsync.wOffset;
- oss << ", hOffset=" << data->args.hipMemcpy2DFromArrayAsync.hOffset;
- oss << ", width=" << data->args.hipMemcpy2DFromArrayAsync.width;
- oss << ", height=" << data->args.hipMemcpy2DFromArrayAsync.height;
- oss << ", kind=" << data->args.hipMemcpy2DFromArrayAsync.kind;
- oss << ", stream=" << data->args.hipMemcpy2DFromArrayAsync.stream;
- oss << ")";
- break;
- case HIP_API_ID_hipDeviceGetLimit:
- oss << "hipDeviceGetLimit(";
- if (data->args.hipDeviceGetLimit.pValue == NULL) oss << "pValue=NULL";
- else oss << "pValue=" << data->args.hipDeviceGetLimit.pValue__val;
- oss << ", limit=" << data->args.hipDeviceGetLimit.limit;
- oss << ")";
- break;
- case HIP_API_ID_hipModuleLoadDataEx:
- oss << "hipModuleLoadDataEx(";
- if (data->args.hipModuleLoadDataEx.module == NULL) oss << "module=NULL";
- else oss << "module=" << data->args.hipModuleLoadDataEx.module__val;
- oss << ", image=" << data->args.hipModuleLoadDataEx.image;
- oss << ", numOptions=" << data->args.hipModuleLoadDataEx.numOptions;
- if (data->args.hipModuleLoadDataEx.options == NULL) oss << ", options=NULL";
- else oss << ", options=" << data->args.hipModuleLoadDataEx.options__val;
- if (data->args.hipModuleLoadDataEx.optionsValues == NULL) oss << ", optionsValues=NULL";
- else oss << ", optionsValues=" << data->args.hipModuleLoadDataEx.optionsValues__val;
- oss << ")";
- break;
- case HIP_API_ID_hipRuntimeGetVersion:
- oss << "hipRuntimeGetVersion(";
- if (data->args.hipRuntimeGetVersion.runtimeVersion == NULL) oss << "runtimeVersion=NULL";
- else oss << "runtimeVersion=" << data->args.hipRuntimeGetVersion.runtimeVersion__val;
- oss << ")";
- break;
- case HIP_API_ID_hipMemRangeGetAttribute:
- oss << "hipMemRangeGetAttribute(";
- oss << "data=" << data->args.hipMemRangeGetAttribute.data;
- oss << ", data_size=" << data->args.hipMemRangeGetAttribute.data_size;
- oss << ", attribute=" << data->args.hipMemRangeGetAttribute.attribute;
- oss << ", dev_ptr=" << data->args.hipMemRangeGetAttribute.dev_ptr;
- oss << ", count=" << data->args.hipMemRangeGetAttribute.count;
- oss << ")";
- break;
- case HIP_API_ID_hipDeviceGetP2PAttribute:
- oss << "hipDeviceGetP2PAttribute(";
- if (data->args.hipDeviceGetP2PAttribute.value == NULL) oss << "value=NULL";
- else oss << "value=" << data->args.hipDeviceGetP2PAttribute.value__val;
- oss << ", attr=" << data->args.hipDeviceGetP2PAttribute.attr;
- oss << ", srcDevice=" << data->args.hipDeviceGetP2PAttribute.srcDevice;
- oss << ", dstDevice=" << data->args.hipDeviceGetP2PAttribute.dstDevice;
- oss << ")";
- break;
- case HIP_API_ID_hipMemcpyPeerAsync:
- oss << "hipMemcpyPeerAsync(";
- oss << "dst=" << data->args.hipMemcpyPeerAsync.dst;
- oss << ", dstDeviceId=" << data->args.hipMemcpyPeerAsync.dstDeviceId;
- oss << ", src=" << data->args.hipMemcpyPeerAsync.src;
- oss << ", srcDevice=" << data->args.hipMemcpyPeerAsync.srcDevice;
- oss << ", sizeBytes=" << data->args.hipMemcpyPeerAsync.sizeBytes;
- oss << ", stream=" << data->args.hipMemcpyPeerAsync.stream;
- oss << ")";
- break;
- case HIP_API_ID_hipGetDeviceProperties:
- oss << "hipGetDeviceProperties(";
- if (data->args.hipGetDeviceProperties.props == NULL) oss << "props=NULL";
- else oss << "props=" << data->args.hipGetDeviceProperties.props__val;
- oss << ", device=" << data->args.hipGetDeviceProperties.device;
- oss << ")";
- break;
- case HIP_API_ID_hipMemcpyDtoH:
- oss << "hipMemcpyDtoH(";
- oss << "dst=" << data->args.hipMemcpyDtoH.dst;
- oss << ", src=" << data->args.hipMemcpyDtoH.src;
- oss << ", sizeBytes=" << data->args.hipMemcpyDtoH.sizeBytes;
- oss << ")";
- break;
- case HIP_API_ID_hipMemcpyWithStream:
- oss << "hipMemcpyWithStream(";
- oss << "dst=" << data->args.hipMemcpyWithStream.dst;
- oss << ", src=" << data->args.hipMemcpyWithStream.src;
- oss << ", sizeBytes=" << data->args.hipMemcpyWithStream.sizeBytes;
- oss << ", kind=" << data->args.hipMemcpyWithStream.kind;
- oss << ", stream=" << data->args.hipMemcpyWithStream.stream;
- oss << ")";
- break;
- case HIP_API_ID_hipDeviceTotalMem:
- oss << "hipDeviceTotalMem(";
- if (data->args.hipDeviceTotalMem.bytes == NULL) oss << "bytes=NULL";
- else oss << "bytes=" << data->args.hipDeviceTotalMem.bytes__val;
- oss << ", device=" << data->args.hipDeviceTotalMem.device;
- oss << ")";
- break;
- case HIP_API_ID_hipHostGetDevicePointer:
- oss << "hipHostGetDevicePointer(";
- if (data->args.hipHostGetDevicePointer.devPtr == NULL) oss << "devPtr=NULL";
- else oss << "devPtr=" << data->args.hipHostGetDevicePointer.devPtr__val;
- oss << ", hstPtr=" << data->args.hipHostGetDevicePointer.hstPtr;
- oss << ", flags=" << data->args.hipHostGetDevicePointer.flags;
- oss << ")";
- break;
- case HIP_API_ID_hipMemRangeGetAttributes:
- oss << "hipMemRangeGetAttributes(";
- if (data->args.hipMemRangeGetAttributes.data == NULL) oss << "data=NULL";
- else oss << "data=" << data->args.hipMemRangeGetAttributes.data__val;
- if (data->args.hipMemRangeGetAttributes.data_sizes == NULL) oss << ", data_sizes=NULL";
- else oss << ", data_sizes=" << data->args.hipMemRangeGetAttributes.data_sizes__val;
- if (data->args.hipMemRangeGetAttributes.attributes == NULL) oss << ", attributes=NULL";
- else oss << ", attributes=" << data->args.hipMemRangeGetAttributes.attributes__val;
- oss << ", num_attributes=" << data->args.hipMemRangeGetAttributes.num_attributes;
- oss << ", dev_ptr=" << data->args.hipMemRangeGetAttributes.dev_ptr;
- oss << ", count=" << data->args.hipMemRangeGetAttributes.count;
- oss << ")";
- break;
- case HIP_API_ID_hipMemcpyParam2D:
- oss << "hipMemcpyParam2D(";
- if (data->args.hipMemcpyParam2D.pCopy == NULL) oss << "pCopy=NULL";
- else oss << "pCopy=" << data->args.hipMemcpyParam2D.pCopy__val;
- oss << ")";
- break;
- case HIP_API_ID_hipDevicePrimaryCtxReset:
- oss << "hipDevicePrimaryCtxReset(";
- oss << "dev=" << data->args.hipDevicePrimaryCtxReset.dev;
- oss << ")";
- break;
- case HIP_API_ID_hipGetMipmappedArrayLevel:
- oss << "hipGetMipmappedArrayLevel(";
- if (data->args.hipGetMipmappedArrayLevel.levelArray == NULL) oss << "levelArray=NULL";
- else oss << "levelArray=" << data->args.hipGetMipmappedArrayLevel.levelArray__val;
- oss << ", mipmappedArray=" << data->args.hipGetMipmappedArrayLevel.mipmappedArray;
- oss << ", level=" << data->args.hipGetMipmappedArrayLevel.level;
- oss << ")";
- break;
- case HIP_API_ID_hipMemsetD32Async:
- oss << "hipMemsetD32Async(";
- oss << "dst=" << data->args.hipMemsetD32Async.dst;
- oss << ", value=" << data->args.hipMemsetD32Async.value;
- oss << ", count=" << data->args.hipMemsetD32Async.count;
- oss << ", stream=" << data->args.hipMemsetD32Async.stream;
- oss << ")";
- break;
- case HIP_API_ID_hipGetDevice:
- oss << "hipGetDevice(";
- if (data->args.hipGetDevice.deviceId == NULL) oss << "deviceId=NULL";
- else oss << "deviceId=" << data->args.hipGetDevice.deviceId__val;
- oss << ")";
- break;
- case HIP_API_ID_hipGetDeviceCount:
- oss << "hipGetDeviceCount(";
- if (data->args.hipGetDeviceCount.count == NULL) oss << "count=NULL";
- else oss << "count=" << data->args.hipGetDeviceCount.count__val;
- oss << ")";
- break;
- case HIP_API_ID_hipIpcOpenEventHandle:
- oss << "hipIpcOpenEventHandle(";
- if (data->args.hipIpcOpenEventHandle.event == NULL) oss << "event=NULL";
- else oss << "event=" << data->args.hipIpcOpenEventHandle.event__val;
- oss << ", handle=" << data->args.hipIpcOpenEventHandle.handle;
- oss << ")";
- break;
- default: oss << "unknown";
- };
- return strdup(oss.str().c_str());
-}
-#endif // HIP_PROF_HIP_API_STRING
-#endif // _HIP_PROF_STR_H
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_runtime.h b/third_party/rocm/include/hip/hcc_detail/hip_runtime.h
deleted file mode 100644
index 5411bb3..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_runtime.h
+++ /dev/null
@@ -1,612 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/**
- * @file hcc_detail/hip_runtime.h
- * @brief Contains definitions of APIs for HIP runtime.
- */
-
-//#pragma once
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_H
-
-#include <hip/hcc_detail/hip_common.h>
-
-//---
-// Top part of file can be compiled with any compiler
-
-//#include <cstring>
-#if __cplusplus
-#include <cmath>
-#include <cstdint>
-#else
-#include <math.h>
-#include <string.h>
-#include <stddef.h>
-#endif //__cplusplus
-
-// __hip_malloc is not working. Disable it by default.
-#ifndef __HIP_ENABLE_DEVICE_MALLOC__
-#define __HIP_ENABLE_DEVICE_MALLOC__ 0
-#endif
-
-#if __HCC_OR_HIP_CLANG__
-
-#if __HIP__
-#if !defined(__align__)
-#define __align__(x) __attribute__((aligned(x)))
-#endif
-#endif
-
-#define CUDA_SUCCESS hipSuccess
-
-#include <hip/hip_runtime_api.h>
-#endif // __HCC_OR_HIP_CLANG__
-
-#if __HCC__
-// define HIP_ENABLE_PRINTF to enable printf
-#ifdef HIP_ENABLE_PRINTF
-#define HCC_ENABLE_ACCELERATOR_PRINTF 1
-#endif
-
-//---
-// Remainder of this file only compiles with HCC
-#if defined __HCC__
-#include "grid_launch.h"
-#include "hc_printf.hpp"
-// TODO-HCC-GL - change this to typedef.
-// typedef grid_launch_parm hipLaunchParm ;
-
-#if GENERIC_GRID_LAUNCH == 0
-#define hipLaunchParm grid_launch_parm
-#else
-namespace hip_impl {
-struct Empty_launch_parm {};
-} // namespace hip_impl
-#define hipLaunchParm hip_impl::Empty_launch_parm
-#endif // GENERIC_GRID_LAUNCH
-
-#if defined(GRID_LAUNCH_VERSION) and (GRID_LAUNCH_VERSION >= 20) || GENERIC_GRID_LAUNCH == 1
-#else // Use field names for grid_launch 2.0 structure, if HCC supports GL 2.0.
-#error(HCC must support GRID_LAUNCH_20)
-#endif // GRID_LAUNCH_VERSION
-
-#endif // HCC
-
-#if GENERIC_GRID_LAUNCH == 1 && defined __HCC__
-#include "grid_launch_GGL.hpp"
-#endif // GENERIC_GRID_LAUNCH
-
-#endif // HCC
-
-#if __HCC_OR_HIP_CLANG__
-extern int HIP_TRACE_API;
-
-#ifdef __cplusplus
-#include <hip/hcc_detail/hip_ldg.h>
-#endif
-#include <hip/hcc_detail/hip_atomic.h>
-#include <hip/hcc_detail/host_defines.h>
-#include <hip/hcc_detail/device_functions.h>
-#include <hip/hcc_detail/surface_functions.h>
-#if __HCC__
- #include <hip/hcc_detail/math_functions.h>
- #include <hip/hcc_detail/texture_functions.h>
-#else
- #include <hip/hcc_detail/texture_fetch_functions.h>
- #include <hip/hcc_detail/texture_indirect_functions.h>
-#endif
-// TODO-HCC remove old definitions ; ~1602 hcc supports __HCC_ACCELERATOR__ define.
-#if defined(__KALMAR_ACCELERATOR__) && !defined(__HCC_ACCELERATOR__)
-#define __HCC_ACCELERATOR__ __KALMAR_ACCELERATOR__
-#endif
-
-// TODO-HCC add a dummy implementation of assert, need to replace with a proper kernel exit call.
-#if defined(__HCC__) && __HIP_DEVICE_COMPILE__ == 1
-#undef assert
-#define assert(COND) \
- { \
- if (!(COND)) { \
- abort(); \
- } \
- }
-#endif
-
-
-// Feature tests:
-#if (defined(__HCC_ACCELERATOR__) && (__HCC_ACCELERATOR__ != 0)) || __HIP_DEVICE_COMPILE__
-// Device compile and not host compile:
-
-// 32-bit Atomics:
-#define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (1)
-#define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (1)
-#define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (1)
-#define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (1)
-#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (1)
-
-// 64-bit Atomics:
-#define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (1)
-#define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (0)
-
-// Doubles
-#define __HIP_ARCH_HAS_DOUBLES__ (1)
-
-// warp cross-lane operations:
-#define __HIP_ARCH_HAS_WARP_VOTE__ (1)
-#define __HIP_ARCH_HAS_WARP_BALLOT__ (1)
-#define __HIP_ARCH_HAS_WARP_SHUFFLE__ (1)
-#define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (0)
-
-// sync
-#define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (1)
-#define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (0)
-
-// misc
-#define __HIP_ARCH_HAS_SURFACE_FUNCS__ (0)
-#define __HIP_ARCH_HAS_3DGRID__ (1)
-#define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (0)
-
-#endif /* Device feature flags */
-
-
-#define launch_bounds_impl0(requiredMaxThreadsPerBlock) \
- __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock)))
-#define launch_bounds_impl1(requiredMaxThreadsPerBlock, minBlocksPerMultiprocessor) \
- __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock), \
- amdgpu_waves_per_eu(minBlocksPerMultiprocessor)))
-#define select_impl_(_1, _2, impl_, ...) impl_
-#define __launch_bounds__(...) \
- select_impl_(__VA_ARGS__, launch_bounds_impl1, launch_bounds_impl0)(__VA_ARGS__)
-
-// Detect if we are compiling C++ mode or C mode
-#if defined(__cplusplus)
-#define __HCC_CPP__
-#elif defined(__STDC_VERSION__)
-#define __HCC_C__
-#endif
-
-__host__ inline void* __get_dynamicgroupbaseptr() { return nullptr; }
-
-#if __HIP_ARCH_GFX701__ == 0
-
-__device__ unsigned __hip_ds_bpermute(int index, unsigned src);
-__device__ float __hip_ds_bpermutef(int index, float src);
-__device__ unsigned __hip_ds_permute(int index, unsigned src);
-__device__ float __hip_ds_permutef(int index, float src);
-
-template <int pattern>
-__device__ unsigned __hip_ds_swizzle_N(unsigned int src);
-template <int pattern>
-__device__ float __hip_ds_swizzlef_N(float src);
-
-template <int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl>
-__device__ int __hip_move_dpp_N(int src);
-
-#endif //__HIP_ARCH_GFX803__ == 1
-
-#endif // __HCC_OR_HIP_CLANG__
-
-#if defined __HCC__
-
-namespace hip_impl {
- struct GroupId {
- using R = decltype(hc_get_group_id(0));
-
- __device__
- R operator()(std::uint32_t x) const noexcept { return hc_get_group_id(x); }
- };
- struct GroupSize {
- using R = decltype(hc_get_group_size(0));
-
- __device__
- R operator()(std::uint32_t x) const noexcept {
- return hc_get_group_size(x);
- }
- };
- struct NumGroups {
- using R = decltype(hc_get_num_groups(0));
-
- __device__
- R operator()(std::uint32_t x) const noexcept {
- return hc_get_num_groups(x);
- }
- };
- struct WorkitemId {
- using R = decltype(hc_get_workitem_id(0));
-
- __device__
- R operator()(std::uint32_t x) const noexcept {
- return hc_get_workitem_id(x);
- }
- };
-} // Namespace hip_impl.
-
-template <typename F>
-struct Coordinates {
- using R = decltype(F{}(0));
-
- struct X { __device__ operator R() const noexcept { return F{}(0); } };
- struct Y { __device__ operator R() const noexcept { return F{}(1); } };
- struct Z { __device__ operator R() const noexcept { return F{}(2); } };
-
- static constexpr X x{};
- static constexpr Y y{};
- static constexpr Z z{};
-};
-
-inline
-__device__
-std::uint32_t operator*(Coordinates<hip_impl::NumGroups>::X,
- Coordinates<hip_impl::GroupSize>::X) noexcept {
- return hc_get_grid_size(0);
-}
-inline
-__device__
-std::uint32_t operator*(Coordinates<hip_impl::GroupSize>::X,
- Coordinates<hip_impl::NumGroups>::X) noexcept {
- return hc_get_grid_size(0);
-}
-inline
-__device__
-std::uint32_t operator*(Coordinates<hip_impl::NumGroups>::Y,
- Coordinates<hip_impl::GroupSize>::Y) noexcept {
- return hc_get_grid_size(1);
-}
-inline
-__device__
-std::uint32_t operator*(Coordinates<hip_impl::GroupSize>::Y,
- Coordinates<hip_impl::NumGroups>::Y) noexcept {
- return hc_get_grid_size(1);
-}
-inline
-__device__
-std::uint32_t operator*(Coordinates<hip_impl::NumGroups>::Z,
- Coordinates<hip_impl::GroupSize>::Z) noexcept {
- return hc_get_grid_size(2);
-}
-inline
-__device__
-std::uint32_t operator*(Coordinates<hip_impl::GroupSize>::Z,
- Coordinates<hip_impl::NumGroups>::Z) noexcept {
- return hc_get_grid_size(2);
-}
-
-static constexpr Coordinates<hip_impl::GroupSize> blockDim{};
-static constexpr Coordinates<hip_impl::GroupId> blockIdx{};
-static constexpr Coordinates<hip_impl::NumGroups> gridDim{};
-static constexpr Coordinates<hip_impl::WorkitemId> threadIdx{};
-
-#define hipThreadIdx_x (hc_get_workitem_id(0))
-#define hipThreadIdx_y (hc_get_workitem_id(1))
-#define hipThreadIdx_z (hc_get_workitem_id(2))
-
-#define hipBlockIdx_x (hc_get_group_id(0))
-#define hipBlockIdx_y (hc_get_group_id(1))
-#define hipBlockIdx_z (hc_get_group_id(2))
-
-#define hipBlockDim_x (hc_get_group_size(0))
-#define hipBlockDim_y (hc_get_group_size(1))
-#define hipBlockDim_z (hc_get_group_size(2))
-
-#define hipGridDim_x (hc_get_num_groups(0))
-#define hipGridDim_y (hc_get_num_groups(1))
-#define hipGridDim_z (hc_get_num_groups(2))
-
-#endif // defined __HCC__
-
-#ifndef __OPENMP_AMDGCN__
-#if __HCC_OR_HIP_CLANG__
-#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
-#if __HIP_ENABLE_DEVICE_MALLOC__
-extern "C" __device__ void* __hip_malloc(size_t);
-extern "C" __device__ void* __hip_free(void* ptr);
-static inline __device__ void* malloc(size_t size) { return __hip_malloc(size); }
-static inline __device__ void* free(void* ptr) { return __hip_free(ptr); }
-#else
-static inline __device__ void* malloc(size_t size) { __builtin_trap(); return nullptr; }
-static inline __device__ void* free(void* ptr) { __builtin_trap(); return nullptr; }
-#endif
-#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
-#endif //__HCC_OR_HIP_CLANG__
-#endif // !__OPENMP_AMDGCN__
-
-#ifdef __HCC__
-
-#define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE)
-
-#define HIP_KERNEL_NAME(...) (__VA_ARGS__)
-#define HIP_SYMBOL(X) #X
-
-#if defined __HCC_CPP__
-extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block,
- grid_launch_parm* lp, const char* kernelNameStr, bool lockAcquired = 0);
-extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block,
- grid_launch_parm* lp, const char* kernelNameStr, bool lockAcquired = 0);
-extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block,
- grid_launch_parm* lp, const char* kernelNameStr, bool lockAcquired = 0);
-extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block,
- grid_launch_parm* lp, const char* kernelNameStr, bool lockAcquired = 0);
-extern void ihipPostLaunchKernel(const char* kernelName, hipStream_t stream, grid_launch_parm& lp, bool unlockPostponed = 0);
-
-#if GENERIC_GRID_LAUNCH == 0
-//#warning "Original hipLaunchKernel defined"
-// Due to multiple overloaded versions of ihipPreLaunchKernel, the numBlocks3D and blockDim3D can be
-// either size_t or dim3 types
-#define hipLaunchKernel(_kernelName, _numBlocks3D, _blockDim3D, _groupMemBytes, _stream, ...) \
- do { \
- grid_launch_parm lp; \
- lp.dynamic_group_mem_bytes = _groupMemBytes; \
- hipStream_t trueStream = \
- (ihipPreLaunchKernel(_stream, _numBlocks3D, _blockDim3D, &lp, #_kernelName)); \
- _kernelName(lp, ##__VA_ARGS__); \
- ihipPostLaunchKernel(#_kernelName, trueStream, lp); \
- } while (0)
-#endif // GENERIC_GRID_LAUNCH
-
-#elif defined(__HCC_C__)
-
-// TODO - develop C interface.
-
-#endif //__HCC_CPP__
-
-// End doxygen API:
-/**
- * @}
- */
-
-//
-// hip-clang functions
-//
-#elif defined(__clang__) && defined(__HIP__)
-
-#define HIP_KERNEL_NAME(...) __VA_ARGS__
-#define HIP_SYMBOL(X) X
-
-typedef int hipLaunchParm;
-
-template <std::size_t n, typename... Ts,
- typename std::enable_if<n == sizeof...(Ts)>::type* = nullptr>
-void pArgs(const std::tuple<Ts...>&, void*) {}
-
-template <std::size_t n, typename... Ts,
- typename std::enable_if<n != sizeof...(Ts)>::type* = nullptr>
-void pArgs(const std::tuple<Ts...>& formals, void** _vargs) {
- using T = typename std::tuple_element<n, std::tuple<Ts...> >::type;
-
- static_assert(!std::is_reference<T>{},
- "A __global__ function cannot have a reference as one of its "
- "arguments.");
-#if defined(HIP_STRICT)
- static_assert(std::is_trivially_copyable<T>{},
- "Only TriviallyCopyable types can be arguments to a __global__ "
- "function");
-#endif
- _vargs[n] = const_cast<void*>(reinterpret_cast<const void*>(&std::get<n>(formals)));
- return pArgs<n + 1>(formals, _vargs);
-}
-
-template <typename... Formals, typename... Actuals>
-std::tuple<Formals...> validateArgsCountType(void (*kernel)(Formals...), std::tuple<Actuals...>(actuals)) {
- static_assert(sizeof...(Formals) == sizeof...(Actuals), "Argument Count Mismatch");
- std::tuple<Formals...> to_formals{std::move(actuals)};
- return to_formals;
-}
-
-#if defined(HIP_TEMPLATE_KERNEL_LAUNCH)
-template <typename... Args, typename F = void (*)(Args...)>
-void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
- std::uint32_t sharedMemBytes, hipStream_t stream, Args... args) {
- constexpr size_t count = sizeof...(Args);
- auto tup_ = std::tuple<Args...>{args...};
- auto tup = validateArgsCountType(kernel, tup_);
- void* _Args[count];
- pArgs<0>(tup, _Args);
-
- auto k = reinterpret_cast<void*>(kernel);
- hipLaunchKernel(k, numBlocks, dimBlocks, _Args, sharedMemBytes, stream);
-}
-#else
-#define hipLaunchKernelGGLInternal(kernelName, numBlocks, numThreads, memPerBlock, streamId, ...) \
- do { \
- kernelName<<<(numBlocks), (numThreads), (memPerBlock), (streamId)>>>(__VA_ARGS__); \
- } while (0)
-
-#define hipLaunchKernelGGL(kernelName, ...) hipLaunchKernelGGLInternal((kernelName), __VA_ARGS__)
-#endif
-
-#include <hip/hip_runtime_api.h>
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_id(uint);
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_group_id(uint);
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_size(uint);
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_num_groups(uint);
-struct __HIP_BlockIdx {
- __device__
- std::uint32_t operator()(std::uint32_t x) const noexcept { return __ockl_get_group_id(x); }
-};
-struct __HIP_BlockDim {
- __device__
- std::uint32_t operator()(std::uint32_t x) const noexcept {
- return __ockl_get_local_size(x);
- }
-};
-struct __HIP_GridDim {
- __device__
- std::uint32_t operator()(std::uint32_t x) const noexcept {
- return __ockl_get_num_groups(x);
- }
-};
-struct __HIP_ThreadIdx {
- __device__
- std::uint32_t operator()(std::uint32_t x) const noexcept {
- return __ockl_get_local_id(x);
- }
-};
-
-template <typename F>
-struct __HIP_Coordinates {
- using R = decltype(F{}(0));
-
- struct X { __device__ operator R() const noexcept { return F{}(0); } };
- struct Y { __device__ operator R() const noexcept { return F{}(1); } };
- struct Z { __device__ operator R() const noexcept { return F{}(2); } };
-
- static constexpr X x{};
- static constexpr Y y{};
- static constexpr Z z{};
-#ifdef __cplusplus
- __device__ operator dim3() const { return dim3(x, y, z); }
-#endif
-
-};
-template <typename F>
-#if !defined(_MSC_VER)
-__attribute__((weak))
-#endif
-constexpr typename __HIP_Coordinates<F>::X __HIP_Coordinates<F>::x;
-template <typename F>
-#if !defined(_MSC_VER)
-__attribute__((weak))
-#endif
-constexpr typename __HIP_Coordinates<F>::Y __HIP_Coordinates<F>::y;
-template <typename F>
-#if !defined(_MSC_VER)
-__attribute__((weak))
-#endif
-constexpr typename __HIP_Coordinates<F>::Z __HIP_Coordinates<F>::z;
-
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_global_size(uint);
-inline
-__device__
-std::uint32_t operator*(__HIP_Coordinates<__HIP_GridDim>::X,
- __HIP_Coordinates<__HIP_BlockDim>::X) noexcept {
- return __ockl_get_global_size(0);
-}
-inline
-__device__
-std::uint32_t operator*(__HIP_Coordinates<__HIP_BlockDim>::X,
- __HIP_Coordinates<__HIP_GridDim>::X) noexcept {
- return __ockl_get_global_size(0);
-}
-inline
-__device__
-std::uint32_t operator*(__HIP_Coordinates<__HIP_GridDim>::Y,
- __HIP_Coordinates<__HIP_BlockDim>::Y) noexcept {
- return __ockl_get_global_size(1);
-}
-inline
-__device__
-std::uint32_t operator*(__HIP_Coordinates<__HIP_BlockDim>::Y,
- __HIP_Coordinates<__HIP_GridDim>::Y) noexcept {
- return __ockl_get_global_size(1);
-}
-inline
-__device__
-std::uint32_t operator*(__HIP_Coordinates<__HIP_GridDim>::Z,
- __HIP_Coordinates<__HIP_BlockDim>::Z) noexcept {
- return __ockl_get_global_size(2);
-}
-inline
-__device__
-std::uint32_t operator*(__HIP_Coordinates<__HIP_BlockDim>::Z,
- __HIP_Coordinates<__HIP_GridDim>::Z) noexcept {
- return __ockl_get_global_size(2);
-}
-
-static constexpr __HIP_Coordinates<__HIP_BlockDim> blockDim{};
-static constexpr __HIP_Coordinates<__HIP_BlockIdx> blockIdx{};
-static constexpr __HIP_Coordinates<__HIP_GridDim> gridDim{};
-static constexpr __HIP_Coordinates<__HIP_ThreadIdx> threadIdx{};
-
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_id(uint);
-#define hipThreadIdx_x (__ockl_get_local_id(0))
-#define hipThreadIdx_y (__ockl_get_local_id(1))
-#define hipThreadIdx_z (__ockl_get_local_id(2))
-
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_group_id(uint);
-#define hipBlockIdx_x (__ockl_get_group_id(0))
-#define hipBlockIdx_y (__ockl_get_group_id(1))
-#define hipBlockIdx_z (__ockl_get_group_id(2))
-
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_size(uint);
-#define hipBlockDim_x (__ockl_get_local_size(0))
-#define hipBlockDim_y (__ockl_get_local_size(1))
-#define hipBlockDim_z (__ockl_get_local_size(2))
-
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_num_groups(uint);
-#define hipGridDim_x (__ockl_get_num_groups(0))
-#define hipGridDim_y (__ockl_get_num_groups(1))
-#define hipGridDim_z (__ockl_get_num_groups(2))
-
-#include <hip/hcc_detail/math_functions.h>
-
-#if __HIP_HCC_COMPAT_MODE__
-// Define HCC work item functions in terms of HIP builtin variables.
-#pragma push_macro("__DEFINE_HCC_FUNC")
-#define __DEFINE_HCC_FUNC(hc_fun,hip_var) \
-inline __device__ __attribute__((always_inline)) uint hc_get_##hc_fun(uint i) { \
- if (i==0) \
- return hip_var.x; \
- else if(i==1) \
- return hip_var.y; \
- else \
- return hip_var.z; \
-}
-
-__DEFINE_HCC_FUNC(workitem_id, threadIdx)
-__DEFINE_HCC_FUNC(group_id, blockIdx)
-__DEFINE_HCC_FUNC(group_size, blockDim)
-__DEFINE_HCC_FUNC(num_groups, gridDim)
-#pragma pop_macro("__DEFINE_HCC_FUNC")
-
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_global_id(uint);
-inline __device__ __attribute__((always_inline)) uint
-hc_get_workitem_absolute_id(int dim)
-{
- return (uint)__ockl_get_global_id(dim);
-}
-
-#endif
-
-#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
-// Support std::complex.
-#if !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__
-#pragma push_macro("__CUDA__")
-#define __CUDA__
-#include <__clang_cuda_math_forward_declares.h>
-#include <__clang_cuda_complex_builtins.h>
-// Workaround for using libc++ with HIP-Clang.
-// The following headers requires clang include path before standard C++ include path.
-// However libc++ include path requires to be before clang include path.
-// To workaround this, we pass -isystem with the parent directory of clang include
-// path instead of the clang include path itself.
-#include <include/cuda_wrappers/algorithm>
-#include <include/cuda_wrappers/complex>
-#include <include/cuda_wrappers/new>
-#undef __CUDA__
-#pragma pop_macro("__CUDA__")
-#endif // !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__
-#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
-#endif // defined(__clang__) && defined(__HIP__)
-
-#include <hip/hcc_detail/hip_memory.h>
-
-#endif // HIP_HCC_DETAIL_RUNTIME_H
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_runtime_api.h b/third_party/rocm/include/hip/hcc_detail/hip_runtime_api.h
deleted file mode 100644
index 1980004..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_runtime_api.h
+++ /dev/null
@@ -1,4358 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-//#pragma once
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_API_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_API_H
-/**
- * @file hcc_detail/hip_runtime_api.h
- * @brief Contains C function APIs for HIP runtime. This file does not use any HCC builtin or
- * special language extensions (-hc mode) ; those functions in hip_runtime.h.
- */
-#include <stdint.h>
-#include <stddef.h>
-
-#ifndef GENERIC_GRID_LAUNCH
-#define GENERIC_GRID_LAUNCH 1
-#endif
-
-#ifndef __HIP_ROCclr__
-#define __HIP_ROCclr__ 0
-#endif
-
-#include <hip/hcc_detail/host_defines.h>
-#include <hip/hcc_detail/driver_types.h>
-#include <hip/hcc_detail/hip_texture_types.h>
-#include <hip/hcc_detail/hip_surface_types.h>
-
-#if !__HIP_ROCclr__ && defined(__cplusplus)
-#include <hsa/hsa.h>
-#include <hip/hcc_detail/program_state.hpp>
-#endif
-
-#if defined(_MSC_VER)
-#define DEPRECATED(msg) __declspec(deprecated(msg))
-#else // !defined(_MSC_VER)
-#define DEPRECATED(msg) __attribute__ ((deprecated(msg)))
-#endif // !defined(_MSC_VER)
-
-#define DEPRECATED_MSG "This API is marked as deprecated and may not be supported in future releases. For more details please refer https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_deprecated_api_list.md"
-
-#if defined(__HCC__) && (__hcc_workweek__ < 16155)
-#error("This version of HIP requires a newer version of HCC.");
-#endif
-
-#define HIP_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01)
-#define HIP_LAUNCH_PARAM_BUFFER_SIZE ((void*)0x02)
-#define HIP_LAUNCH_PARAM_END ((void*)0x03)
-
-#ifdef __cplusplus
- #define __dparm(x) \
- = x
-#else
- #define __dparm(x)
-#endif
-
-#ifdef __GNUC__
-#pragma GCC visibility push (default)
-#endif
-
-#ifdef __cplusplus
-
-namespace hip_impl {
-hipError_t hip_init();
-} // namespace hip_impl
-#endif
-
-// Structure definitions:
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-//---
-// API-visible structures
-typedef struct ihipCtx_t* hipCtx_t;
-
-// Note many APIs also use integer deviceIds as an alternative to the device pointer:
-typedef int hipDevice_t;
-
-typedef enum hipDeviceP2PAttr {
- hipDevP2PAttrPerformanceRank = 0,
- hipDevP2PAttrAccessSupported,
- hipDevP2PAttrNativeAtomicSupported,
- hipDevP2PAttrHipArrayAccessSupported
-} hipDeviceP2PAttr;
-
-typedef struct ihipStream_t* hipStream_t;
-
-#define hipIpcMemLazyEnablePeerAccess 0
-
-#define HIP_IPC_HANDLE_SIZE 64
-
-typedef struct hipIpcMemHandle_st {
- char reserved[HIP_IPC_HANDLE_SIZE];
-} hipIpcMemHandle_t;
-
-#if __HIP_ROCclr__
-// TODO: IPC event handle currently unsupported
-struct ihipIpcEventHandle_t;
-typedef struct ihipIpcEventHandle_t* hipIpcEventHandle_t;
-#else
-typedef struct hipIpcEventHandle_st {
- char reserved[HIP_IPC_HANDLE_SIZE];
-} hipIpcEventHandle_t;
-#endif
-typedef struct ihipModule_t* hipModule_t;
-
-typedef struct ihipModuleSymbol_t* hipFunction_t;
-
-typedef struct hipFuncAttributes {
- int binaryVersion;
- int cacheModeCA;
- size_t constSizeBytes;
- size_t localSizeBytes;
- int maxDynamicSharedSizeBytes;
- int maxThreadsPerBlock;
- int numRegs;
- int preferredShmemCarveout;
- int ptxVersion;
- size_t sharedSizeBytes;
-} hipFuncAttributes;
-
-typedef struct ihipEvent_t* hipEvent_t;
-
-enum hipLimit_t {
- hipLimitMallocHeapSize = 0x02,
-};
-
-/**
- * @addtogroup GlobalDefs More
- * @{
- */
-//! Flags that can be used with hipStreamCreateWithFlags
-#define hipStreamDefault \
- 0x00 ///< Default stream creation flags. These are used with hipStreamCreate().
-#define hipStreamNonBlocking 0x01 ///< Stream does not implicitly synchronize with null stream
-
-
-//! Flags that can be used with hipEventCreateWithFlags:
-#define hipEventDefault 0x0 ///< Default flags
-#define hipEventBlockingSync \
- 0x1 ///< Waiting will yield CPU. Power-friendly and usage-friendly but may increase latency.
-#define hipEventDisableTiming \
- 0x2 ///< Disable event's capability to record timing information. May improve performance.
-#define hipEventInterprocess 0x4 ///< Event can support IPC. @warning - not supported in HIP.
-#define hipEventReleaseToDevice \
- 0x40000000 /// < Use a device-scope release when recording this event. This flag is useful to
- /// obtain more precise timings of commands between events. The flag is a no-op on
- /// CUDA platforms.
-#define hipEventReleaseToSystem \
- 0x80000000 /// < Use a system-scope release that when recording this event. This flag is
- /// useful to make non-coherent host memory visible to the host. The flag is a
- /// no-op on CUDA platforms.
-
-
-//! Flags that can be used with hipHostMalloc
-#define hipHostMallocDefault 0x0
-#define hipHostMallocPortable 0x1 ///< Memory is considered allocated by all contexts.
-#define hipHostMallocMapped \
- 0x2 ///< Map the allocation into the address space for the current device. The device pointer
- ///< can be obtained with #hipHostGetDevicePointer.
-#define hipHostMallocWriteCombined 0x4
-#define hipHostMallocNumaUser \
- 0x20000000 ///< Host memory allocation will follow numa policy set by user
-
-#define hipHostMallocCoherent \
- 0x40000000 ///< Allocate coherent memory. Overrides HIP_COHERENT_HOST_ALLOC for specific
- ///< allocation.
-#define hipHostMallocNonCoherent \
- 0x80000000 ///< Allocate non-coherent memory. Overrides HIP_COHERENT_HOST_ALLOC for specific
- ///< allocation.
-
-#define hipMemAttachGlobal 0x01 ///< Memory can be accessed by any stream on any device
-#define hipMemAttachHost 0x02 ///< Memory cannot be accessed by any stream on any device
-#define hipMemAttachSingle 0x04 ///< Memory can only be accessed by a single stream on
- ///< the associated device
-
-#define hipDeviceMallocDefault 0x0
-#define hipDeviceMallocFinegrained 0x1 ///< Memory is allocated in fine grained region of device.
-
-//! Flags that can be used with hipHostRegister
-#define hipHostRegisterDefault 0x0 ///< Memory is Mapped and Portable
-#define hipHostRegisterPortable 0x1 ///< Memory is considered registered by all contexts.
-#define hipHostRegisterMapped \
- 0x2 ///< Map the allocation into the address space for the current device. The device pointer
- ///< can be obtained with #hipHostGetDevicePointer.
-#define hipHostRegisterIoMemory 0x4 ///< Not supported.
-#define hipExtHostRegisterCoarseGrained 0x8 ///< Coarse Grained host memory lock
-
-#define hipDeviceScheduleAuto 0x0 ///< Automatically select between Spin and Yield
-#define hipDeviceScheduleSpin \
- 0x1 ///< Dedicate a CPU core to spin-wait. Provides lowest latency, but burns a CPU core and
- ///< may consume more power.
-#define hipDeviceScheduleYield \
- 0x2 ///< Yield the CPU to the operating system when waiting. May increase latency, but lowers
- ///< power and is friendlier to other threads in the system.
-#define hipDeviceScheduleBlockingSync 0x4
-#define hipDeviceScheduleMask 0x7
-
-#define hipDeviceMapHost 0x8
-#define hipDeviceLmemResizeToMax 0x16
-
-#define hipArrayDefault 0x00 ///< Default HIP array allocation flag
-#define hipArrayLayered 0x01
-#define hipArraySurfaceLoadStore 0x02
-#define hipArrayCubemap 0x04
-#define hipArrayTextureGather 0x08
-
-#define hipOccupancyDefault 0x00
-
-#define hipCooperativeLaunchMultiDeviceNoPreSync 0x01
-#define hipCooperativeLaunchMultiDeviceNoPostSync 0x02
-
-#define hipCpuDeviceId ((int)-1)
-#define hipInvalidDeviceId ((int)-2)
-
-// Flags that can be used with hipExtLaunch Set of APIs
-#define hipExtAnyOrderLaunch 0x01 ///< AnyOrderLaunch of kernels
-
-/*
- * @brief HIP Memory Advise values
- * @enum
- * @ingroup Enumerations
- */
-typedef enum hipMemoryAdvise {
- hipMemAdviseSetReadMostly = 1, ///< Data will mostly be read and only occassionally
- ///< be written to
- hipMemAdviseUnsetReadMostly = 2, ///< Undo the effect of hipMemAdviseSetReadMostly
- hipMemAdviseSetPreferredLocation = 3, ///< Set the preferred location for the data as
- ///< the specified device
- hipMemAdviseUnsetPreferredLocation = 4, ///< Clear the preferred location for the data
- hipMemAdviseSetAccessedBy = 5, ///< Data will be accessed by the specified device,
- ///< so prevent page faults as much as possible
- hipMemAdviseUnsetAccessedBy = 6 ///< Let the Unified Memory subsystem decide on
- ///< the page faulting policy for the specified device
-} hipMemoryAdvise;
-
-/*
- * @brief HIP range attributes
- * @enum
- * @ingroup Enumerations
- */
-typedef enum hipMemRangeAttribute {
- hipMemRangeAttributeReadMostly = 1, ///< Whether the range will mostly be read and
- ///< only occassionally be written to
- hipMemRangeAttributePreferredLocation = 2, ///< The preferred location of the range
- hipMemRangeAttributeAccessedBy = 3, ///< Memory range has cudaMemAdviseSetAccessedBy
- ///< set for specified device
- hipMemRangeAttributeLastPrefetchLocation = 4,///< The last location to which the range was prefetched
-} hipMemRangeAttribute;
-
-/*
- * @brief hipJitOption
- * @enum
- * @ingroup Enumerations
- */
-typedef enum hipJitOption {
- hipJitOptionMaxRegisters = 0,
- hipJitOptionThreadsPerBlock,
- hipJitOptionWallTime,
- hipJitOptionInfoLogBuffer,
- hipJitOptionInfoLogBufferSizeBytes,
- hipJitOptionErrorLogBuffer,
- hipJitOptionErrorLogBufferSizeBytes,
- hipJitOptionOptimizationLevel,
- hipJitOptionTargetFromContext,
- hipJitOptionTarget,
- hipJitOptionFallbackStrategy,
- hipJitOptionGenerateDebugInfo,
- hipJitOptionLogVerbose,
- hipJitOptionGenerateLineInfo,
- hipJitOptionCacheMode,
- hipJitOptionSm3xOpt,
- hipJitOptionFastCompile,
- hipJitOptionNumOptions
-} hipJitOption;
-
-/**
- * @warning On AMD devices and some Nvidia devices, these hints and controls are ignored.
- */
-typedef enum hipFuncAttribute {
- hipFuncAttributeMaxDynamicSharedMemorySize = 8,
- hipFuncAttributePreferredSharedMemoryCarveout = 9,
- hipFuncAttributeMax
-} hipFuncAttribute;
-
-/**
- * @warning On AMD devices and some Nvidia devices, these hints and controls are ignored.
- */
-typedef enum hipFuncCache_t {
- hipFuncCachePreferNone, ///< no preference for shared memory or L1 (default)
- hipFuncCachePreferShared, ///< prefer larger shared memory and smaller L1 cache
- hipFuncCachePreferL1, ///< prefer larger L1 cache and smaller shared memory
- hipFuncCachePreferEqual, ///< prefer equal size L1 cache and shared memory
-} hipFuncCache_t;
-
-/**
- * @warning On AMD devices and some Nvidia devices, these hints and controls are ignored.
- */
-typedef enum hipSharedMemConfig {
- hipSharedMemBankSizeDefault, ///< The compiler selects a device-specific value for the banking.
- hipSharedMemBankSizeFourByte, ///< Shared mem is banked at 4-bytes intervals and performs best
- ///< when adjacent threads access data 4 bytes apart.
- hipSharedMemBankSizeEightByte ///< Shared mem is banked at 8-byte intervals and performs best
- ///< when adjacent threads access data 4 bytes apart.
-} hipSharedMemConfig;
-
-/**
- * Struct for data in 3D
- *
- */
-typedef struct dim3 {
- uint32_t x; ///< x
- uint32_t y; ///< y
- uint32_t z; ///< z
-#ifdef __cplusplus
- __host__ __device__ dim3(uint32_t _x = 1, uint32_t _y = 1, uint32_t _z = 1) : x(_x), y(_y), z(_z){};
-#endif
-} dim3;
-
-typedef struct hipLaunchParams_t {
- void* func; ///< Device function symbol
- dim3 gridDim; ///< Grid dimentions
- dim3 blockDim; ///< Block dimentions
- void **args; ///< Arguments
- size_t sharedMem; ///< Shared memory
- hipStream_t stream; ///< Stream identifier
-} hipLaunchParams;
-
-#if __HIP_HAS_GET_PCH
-/**
- * Internal use only. This API may change in the future
- * Pre-Compiled header for online compilation
- *
- */
- void __hipGetPCH(const char** pch, unsigned int*size);
-#endif
-
-
-// Doxygen end group GlobalDefs
-/** @} */
-
-
-//-------------------------------------------------------------------------------------------------
-
-
-// The handle allows the async commands to use the stream even if the parent hipStream_t goes
-// out-of-scope.
-// typedef class ihipStream_t * hipStream_t;
-
-
-/*
- * Opaque structure allows the true event (pointed at by the handle) to remain "live" even if the
- * surrounding hipEvent_t goes out-of-scope. This is handy for cases where the hipEvent_t goes
- * out-of-scope but the true event is being written by some async queue or device */
-// typedef struct hipEvent_t {
-// struct ihipEvent_t *_handle;
-//} hipEvent_t;
-
-
-/**
- * @defgroup API HIP API
- * @{
- *
- * Defines the HIP API. See the individual sections for more information.
- */
-
-
-/**
- * @defgroup Driver Initialization and Version
- * @{
- * This section describes the initializtion and version functions of HIP runtime API.
- *
- */
-
-/**
- * @brief Explicitly initializes the HIP runtime.
- *
- * Most HIP APIs implicitly initialize the HIP runtime.
- * This API provides control over the timing of the initialization.
- */
-// TODO-ctx - more description on error codes.
-hipError_t hipInit(unsigned int flags);
-
-/**
- * @brief Returns the approximate HIP driver version.
- *
- * @param [out] driverVersion
- *
- * @returns #hipSuccess, #hipErrorInavlidValue
- *
- * @warning The HIP feature set does not correspond to an exact CUDA SDK driver revision.
- * This function always set *driverVersion to 4 as an approximation though HIP supports
- * some features which were introduced in later CUDA SDK revisions.
- * HIP apps code should not rely on the driver revision number here and should
- * use arch feature flags to test device capabilities or conditional compilation.
- *
- * @see hipRuntimeGetVersion
- */
-hipError_t hipDriverGetVersion(int* driverVersion);
-
-/**
- * @brief Returns the approximate HIP Runtime version.
- *
- * @param [out] runtimeVersion
- *
- * @returns #hipSuccess, #hipErrorInavlidValue
- *
- * @warning On HIP/HCC path this function returns HIP runtime patch version however on
- * HIP/NVCC path this function return CUDA runtime version.
- *
- * @see hipDriverGetVersion
- */
-hipError_t hipRuntimeGetVersion(int* runtimeVersion);
-
-
-/**
- * @brief Returns a handle to a compute device
- * @param [out] device
- * @param [in] ordinal
- *
- * @returns #hipSuccess, #hipErrorInavlidDevice
- */
-hipError_t hipDeviceGet(hipDevice_t* device, int ordinal);
-
-/**
- * @brief Returns the compute capability of the device
- * @param [out] major
- * @param [out] minor
- * @param [in] device
- *
- * @returns #hipSuccess, #hipErrorInavlidDevice
- */
-hipError_t hipDeviceComputeCapability(int* major, int* minor, hipDevice_t device);
-
-/**
- * @brief Returns an identifer string for the device.
- * @param [out] name
- * @param [in] len
- * @param [in] device
- *
- * @returns #hipSuccess, #hipErrorInavlidDevice
- */
-hipError_t hipDeviceGetName(char* name, int len, hipDevice_t device);
-
-
-/**
- * @brief Returns a value for attr of link between two devices
- * @param [out] value
- * @param [in] attr
- * @param [in] srcDevice
- * @param [in] dstDevice
- *
- * @returns #hipSuccess, #hipErrorInavlidDevice
- */
-hipError_t hipDeviceGetP2PAttribute(int* value, hipDeviceP2PAttr attr,
- int srcDevice, int dstDevice);
-
-/**
- * @brief Returns a PCI Bus Id string for the device, overloaded to take int device ID.
- * @param [out] pciBusId
- * @param [in] len
- * @param [in] device
- *
- * @returns #hipSuccess, #hipErrorInavlidDevice
- */
-hipError_t hipDeviceGetPCIBusId(char* pciBusId, int len, int device);
-
-
-/**
- * @brief Returns a handle to a compute device.
- * @param [out] device handle
- * @param [in] PCI Bus ID
- *
- * @returns #hipSuccess, #hipErrorInavlidDevice, #hipErrorInvalidValue
- */
-hipError_t hipDeviceGetByPCIBusId(int* device, const char* pciBusId);
-
-
-/**
- * @brief Returns the total amount of memory on the device.
- * @param [out] bytes
- * @param [in] device
- *
- * @returns #hipSuccess, #hipErrorInavlidDevice
- */
-hipError_t hipDeviceTotalMem(size_t* bytes, hipDevice_t device);
-
-
-// doxygen end initialization
-/**
- * @}
- */
-
-/**
- * @defgroup Device Device Management
- * @{
- * This section describes the device management functions of HIP runtime API.
- */
-
-/**
- * @brief Waits on all active streams on current device
- *
- * When this command is invoked, the host thread gets blocked until all the commands associated
- * with streams associated with the device. HIP does not support multiple blocking modes (yet!).
- *
- * @returns #hipSuccess
- *
- * @see hipSetDevice, hipDeviceReset
- */
-hipError_t hipDeviceSynchronize(void);
-
-
-/**
- * @brief The state of current device is discarded and updated to a fresh state.
- *
- * Calling this function deletes all streams created, memory allocated, kernels running, events
- * created. Make sure that no other thread is using the device or streams, memory, kernels, events
- * associated with the current device.
- *
- * @returns #hipSuccess
- *
- * @see hipDeviceSynchronize
- */
-hipError_t hipDeviceReset(void);
-
-
-/**
- * @brief Set default device to be used for subsequent hip API calls from this thread.
- *
- * @param[in] deviceId Valid device in range 0...hipGetDeviceCount().
- *
- * Sets @p device as the default device for the calling host thread. Valid device id's are 0...
- * (hipGetDeviceCount()-1).
- *
- * Many HIP APIs implicitly use the "default device" :
- *
- * - Any device memory subsequently allocated from this host thread (using hipMalloc) will be
- * allocated on device.
- * - Any streams or events created from this host thread will be associated with device.
- * - Any kernels launched from this host thread (using hipLaunchKernel) will be executed on device
- * (unless a specific stream is specified, in which case the device associated with that stream will
- * be used).
- *
- * This function may be called from any host thread. Multiple host threads may use the same device.
- * This function does no synchronization with the previous or new device, and has very little
- * runtime overhead. Applications can use hipSetDevice to quickly switch the default device before
- * making a HIP runtime call which uses the default device.
- *
- * The default device is stored in thread-local-storage for each thread.
- * Thread-pool implementations may inherit the default device of the previous thread. A good
- * practice is to always call hipSetDevice at the start of HIP coding sequency to establish a known
- * standard device.
- *
- * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorDeviceAlreadyInUse
- *
- * @see hipGetDevice, hipGetDeviceCount
- */
-hipError_t hipSetDevice(int deviceId);
-
-
-/**
- * @brief Return the default device id for the calling host thread.
- *
- * @param [out] device *device is written with the default device
- *
- * HIP maintains an default device for each thread using thread-local-storage.
- * This device is used implicitly for HIP runtime APIs called by this thread.
- * hipGetDevice returns in * @p device the default device for the calling host thread.
- *
- * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue
- *
- * @see hipSetDevice, hipGetDevicesizeBytes
- */
-hipError_t hipGetDevice(int* deviceId);
-
-
-/**
- * @brief Return number of compute-capable devices.
- *
- * @param [output] count Returns number of compute-capable devices.
- *
- * @returns #hipSuccess, #hipErrorNoDevice
- *
- *
- * Returns in @p *count the number of devices that have ability to run compute commands. If there
- * are no such devices, then @ref hipGetDeviceCount will return #hipErrorNoDevice. If 1 or more
- * devices can be found, then hipGetDeviceCount returns #hipSuccess.
- */
-hipError_t hipGetDeviceCount(int* count);
-
-/**
- * @brief Query for a specific device attribute.
- *
- * @param [out] pi pointer to value to return
- * @param [in] attr attribute to query
- * @param [in] deviceId which device to query for information
- *
- * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue
- */
-hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int deviceId);
-
-/**
- * @brief Returns device properties.
- *
- * @param [out] prop written with device properties
- * @param [in] deviceId which device to query for information
- *
- * @return #hipSuccess, #hipErrorInvalidDevice
- * @bug HCC always returns 0 for maxThreadsPerMultiProcessor
- * @bug HCC always returns 0 for regsPerBlock
- * @bug HCC always returns 0 for l2CacheSize
- *
- * Populates hipGetDeviceProperties with information for the specified device.
- */
-hipError_t hipGetDeviceProperties(hipDeviceProp_t* prop, int deviceId);
-
-
-/**
- * @brief Set L1/Shared cache partition.
- *
- * @param [in] cacheConfig
- *
- * @returns #hipSuccess, #hipErrorNotInitialized
- * Note: AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is ignored
- * on those architectures.
- *
- */
-hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig);
-
-
-/**
- * @brief Set Cache configuration for a specific function
- *
- * @param [in] cacheConfig
- *
- * @returns #hipSuccess, #hipErrorNotInitialized
- * Note: AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is ignored
- * on those architectures.
- *
- */
-hipError_t hipDeviceGetCacheConfig(hipFuncCache_t* cacheConfig);
-
-/**
- * @brief Get Resource limits of current device
- *
- * @param [out] pValue
- * @param [in] limit
- *
- * @returns #hipSuccess, #hipErrorUnsupportedLimit, #hipErrorInvalidValue
- * Note: Currently, only hipLimitMallocHeapSize is available
- *
- */
-hipError_t hipDeviceGetLimit(size_t* pValue, enum hipLimit_t limit);
-
-
-/**
- * @brief Returns bank width of shared memory for current device
- *
- * @param [out] pConfig
- *
- * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized
- *
- * Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is
- * ignored on those architectures.
- *
- */
-hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig* pConfig);
-
-/**
- * @brief Gets the flags set for current device
- *
- * @param [out] flags
- *
- * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue
- */
-hipError_t hipGetDeviceFlags(unsigned int* flags);
-
-/**
- * @brief The bank width of shared memory on current device is set
- *
- * @param [in] config
- *
- * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized
- *
- * Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is
- * ignored on those architectures.
- *
- */
-hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config);
-
-/**
- * @brief The current device behavior is changed according the flags passed.
- *
- * @param [in] flags
- *
- * The schedule flags impact how HIP waits for the completion of a command running on a device.
- * hipDeviceScheduleSpin : HIP runtime will actively spin in the thread which submitted the
- * work until the command completes. This offers the lowest latency, but will consume a CPU core
- * and may increase power. hipDeviceScheduleYield : The HIP runtime will yield the CPU to
- * system so that other tasks can use it. This may increase latency to detect the completion but
- * will consume less power and is friendlier to other tasks in the system.
- * hipDeviceScheduleBlockingSync : On ROCm platform, this is a synonym for hipDeviceScheduleYield.
- * hipDeviceScheduleAuto : Use a hueristic to select between Spin and Yield modes. If the
- * number of HIP contexts is greater than the number of logical processors in the system, use Spin
- * scheduling. Else use Yield scheduling.
- *
- *
- * hipDeviceMapHost : Allow mapping host memory. On ROCM, this is always allowed and
- * the flag is ignored. hipDeviceLmemResizeToMax : @warning ROCm silently ignores this flag.
- *
- * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorSetOnActiveProcess
- *
- *
- */
-hipError_t hipSetDeviceFlags(unsigned flags);
-
-/**
- * @brief Device which matches hipDeviceProp_t is returned
- *
- * @param [out] device ID
- * @param [in] device properties pointer
- *
- * @returns #hipSuccess, #hipErrorInvalidValue
- */
-hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* prop);
-
-/**
- * @brief Returns the link type and hop count between two devices
- *
- * @param [in] device1 Ordinal for device1
- * @param [in] device2 Ordinal for device2
- * @param [out] linktype Returns the link type (See hsa_amd_link_info_type_t) between the two devices
- * @param [out] hopcount Returns the hop count between the two devices
- *
- * Queries and returns the HSA link type and the hop count between the two specified devices.
- *
- * @returns #hipSuccess, #hipInvalidDevice, #hipErrorRuntimeOther
- */
-hipError_t hipExtGetLinkTypeAndHopCount(int device1, int device2, uint32_t* linktype, uint32_t* hopcount);
-
-
-// TODO: implement IPC apis
-
-/**
- * @brief Gets an interprocess memory handle for an existing device memory
- * allocation
- *
- * Takes a pointer to the base of an existing device memory allocation created
- * with hipMalloc and exports it for use in another process. This is a
- * lightweight operation and may be called multiple times on an allocation
- * without adverse effects.
- *
- * If a region of memory is freed with hipFree and a subsequent call
- * to hipMalloc returns memory with the same device address,
- * hipIpcGetMemHandle will return a unique handle for the
- * new memory.
- *
- * @param handle - Pointer to user allocated hipIpcMemHandle to return
- * the handle in.
- * @param devPtr - Base pointer to previously allocated device memory
- *
- * @returns
- * hipSuccess,
- * hipErrorInvalidHandle,
- * hipErrorOutOfMemory,
- * hipErrorMapFailed,
- *
- */
-hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr);
-
-/**
- * @brief Opens an interprocess memory handle exported from another process
- * and returns a device pointer usable in the local process.
- *
- * Maps memory exported from another process with hipIpcGetMemHandle into
- * the current device address space. For contexts on different devices
- * hipIpcOpenMemHandle can attempt to enable peer access between the
- * devices as if the user called hipDeviceEnablePeerAccess. This behavior is
- * controlled by the hipIpcMemLazyEnablePeerAccess flag.
- * hipDeviceCanAccessPeer can determine if a mapping is possible.
- *
- * Contexts that may open hipIpcMemHandles are restricted in the following way.
- * hipIpcMemHandles from each device in a given process may only be opened
- * by one context per device per other process.
- *
- * Memory returned from hipIpcOpenMemHandle must be freed with
- * hipIpcCloseMemHandle.
- *
- * Calling hipFree on an exported memory region before calling
- * hipIpcCloseMemHandle in the importing context will result in undefined
- * behavior.
- *
- * @param devPtr - Returned device pointer
- * @param handle - hipIpcMemHandle to open
- * @param flags - Flags for this operation. Must be specified as hipIpcMemLazyEnablePeerAccess
- *
- * @returns
- * hipSuccess,
- * hipErrorMapFailed,
- * hipErrorInvalidHandle,
- * hipErrorTooManyPeers
- *
- * @note No guarantees are made about the address returned in @p *devPtr.
- * In particular, multiple processes may not receive the same address for the same @p handle.
- *
- */
-hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned int flags);
-
-/**
- * @brief Close memory mapped with hipIpcOpenMemHandle
- *
- * Unmaps memory returnd by hipIpcOpenMemHandle. The original allocation
- * in the exporting process as well as imported mappings in other processes
- * will be unaffected.
- *
- * Any resources used to enable peer access will be freed if this is the
- * last mapping using them.
- *
- * @param devPtr - Device pointer returned by hipIpcOpenMemHandle
- *
- * @returns
- * hipSuccess,
- * hipErrorMapFailed,
- * hipErrorInvalidHandle,
- *
- */
-hipError_t hipIpcCloseMemHandle(void* devPtr);
-
-
-hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event);
-hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle);
-
-// end doxygen Device
-/**
- * @}
- */
-
-/**
- *
- * @defgroup Execution Execution Control
- * @{
- * This section describes the execution control functions of HIP runtime API.
- *
- */
-/**
- * @brief Set attribute for a specific function
- *
- * @param [in] func;
- * @param [in] attr;
- * @param [in] value;
- *
- * @returns #hipSuccess, #hipErrorInvalidDeviceFunction, #hipErrorInvalidValue
- *
- * Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is
- * ignored on those architectures.
- *
- */
-hipError_t hipFuncSetAttribute(const void* func, hipFuncAttribute attr, int value);
-
-/**
- * @brief Set Cache configuration for a specific function
- *
- * @param [in] config;
- *
- * @returns #hipSuccess, #hipErrorNotInitialized
- * Note: AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is ignored
- * on those architectures.
- *
- */
-hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t config);
-
-/**
- * @brief Set shared memory configuation for a specific function
- *
- * @param [in] func
- * @param [in] config
- *
- * @returns #hipSuccess, #hipErrorInvalidDeviceFunction, #hipErrorInvalidValue
- *
- * Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is
- * ignored on those architectures.
- *
- */
-hipError_t hipFuncSetSharedMemConfig(const void* func, hipSharedMemConfig config);
-
-//doxygen end execution
-/**
- * @}
- */
-
-/**
- *-------------------------------------------------------------------------------------------------
- *-------------------------------------------------------------------------------------------------
- * @defgroup Error Error Handling
- * @{
- * This section describes the error handling functions of HIP runtime API.
- */
-
-/**
- * @brief Return last error returned by any HIP runtime API call and resets the stored error code to
- * #hipSuccess
- *
- * @returns return code from last HIP called from the active host thread
- *
- * Returns the last error that has been returned by any of the runtime calls in the same host
- * thread, and then resets the saved error to #hipSuccess.
- *
- * @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t
- */
-hipError_t hipGetLastError(void);
-
-
-/**
- * @brief Return last error returned by any HIP runtime API call.
- *
- * @return #hipSuccess
- *
- * Returns the last error that has been returned by any of the runtime calls in the same host
- * thread. Unlike hipGetLastError, this function does not reset the saved error code.
- *
- * @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t
- */
-hipError_t hipPeekAtLastError(void);
-
-
-/**
- * @brief Return name of the specified error code in text form.
- *
- * @param hip_error Error code to convert to name.
- * @return const char pointer to the NULL-terminated error name
- *
- * @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t
- */
-const char* hipGetErrorName(hipError_t hip_error);
-
-
-/**
- * @brief Return handy text string message to explain the error which occurred
- *
- * @param hipError Error code to convert to string.
- * @return const char pointer to the NULL-terminated error string
- *
- * @warning : on HCC, this function returns the name of the error (same as hipGetErrorName)
- *
- * @see hipGetErrorName, hipGetLastError, hipPeakAtLastError, hipError_t
- */
-const char* hipGetErrorString(hipError_t hipError);
-
-// end doxygen Error
-/**
- * @}
- */
-
-
-/**
- *-------------------------------------------------------------------------------------------------
- *-------------------------------------------------------------------------------------------------
- * @defgroup Stream Stream Management
- * @{
- * This section describes the stream management functions of HIP runtime API.
- * The following Stream APIs are not (yet) supported in HIP:
- * - cudaStreamAttachMemAsync
- */
-
-
-/**
- * @brief Create an asynchronous stream.
- *
- * @param[in, out] stream Valid pointer to hipStream_t. This function writes the memory with the
- * newly created stream.
- * @return #hipSuccess, #hipErrorInvalidValue
- *
- * Create a new asynchronous stream. @p stream returns an opaque handle that can be used to
- * reference the newly created stream in subsequent hipStream* commands. The stream is allocated on
- * the heap and will remain allocated even if the handle goes out-of-scope. To release the memory
- * used by the stream, applicaiton must call hipStreamDestroy.
- *
- * @return #hipSuccess, #hipErrorInvalidValue
- *
- * @see hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy
- */
-hipError_t hipStreamCreate(hipStream_t* stream);
-
-
-/**
- * @brief Create an asynchronous stream.
- *
- * @param[in, out] stream Pointer to new stream
- * @param[in ] flags to control stream creation.
- * @return #hipSuccess, #hipErrorInvalidValue
- *
- * Create a new asynchronous stream. @p stream returns an opaque handle that can be used to
- * reference the newly created stream in subsequent hipStream* commands. The stream is allocated on
- * the heap and will remain allocated even if the handle goes out-of-scope. To release the memory
- * used by the stream, applicaiton must call hipStreamDestroy. Flags controls behavior of the
- * stream. See #hipStreamDefault, #hipStreamNonBlocking.
- *
- *
- * @see hipStreamCreate, hipStreamCreateWithPriority, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy
- */
-
-hipError_t hipStreamCreateWithFlags(hipStream_t* stream, unsigned int flags);
-
-
-/**
- * @brief Create an asynchronous stream with the specified priority.
- *
- * @param[in, out] stream Pointer to new stream
- * @param[in ] flags to control stream creation.
- * @param[in ] priority of the stream. Lower numbers represent higher priorities.
- * @return #hipSuccess, #hipErrorInvalidValue
- *
- * Create a new asynchronous stream with the specified priority. @p stream returns an opaque handle
- * that can be used to reference the newly created stream in subsequent hipStream* commands. The
- * stream is allocated on the heap and will remain allocated even if the handle goes out-of-scope.
- * To release the memory used by the stream, applicaiton must call hipStreamDestroy. Flags controls
- * behavior of the stream. See #hipStreamDefault, #hipStreamNonBlocking.
- *
- *
- * @see hipStreamCreate, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy
- */
-
-hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority);
-
-
-/**
- * @brief Returns numerical values that correspond to the least and greatest stream priority.
- *
- * @param[in, out] leastPriority pointer in which value corresponding to least priority is returned.
- * @param[in, out] greatestPriority pointer in which value corresponding to greatest priority is returned.
- *
- * Returns in *leastPriority and *greatestPriority the numerical values that correspond to the least
- * and greatest stream priority respectively. Stream priorities follow a convention where lower numbers
- * imply greater priorities. The range of meaningful stream priorities is given by
- * [*greatestPriority, *leastPriority]. If the user attempts to create a stream with a priority value
- * that is outside the the meaningful range as specified by this API, the priority is automatically
- * clamped to within the valid range.
- */
-
-hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority);
-
-
-/**
- * @brief Destroys the specified stream.
- *
- * @param[in, out] stream Valid pointer to hipStream_t. This function writes the memory with the
- * newly created stream.
- * @return #hipSuccess #hipErrorInvalidHandle
- *
- * Destroys the specified stream.
- *
- * If commands are still executing on the specified stream, some may complete execution before the
- * queue is deleted.
- *
- * The queue may be destroyed while some commands are still inflight, or may wait for all commands
- * queued to the stream before destroying it.
- *
- * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamQuery, hipStreamWaitEvent,
- * hipStreamSynchronize
- */
-hipError_t hipStreamDestroy(hipStream_t stream);
-
-
-/**
- * @brief Return #hipSuccess if all of the operations in the specified @p stream have completed, or
- * #hipErrorNotReady if not.
- *
- * @param[in] stream stream to query
- *
- * @return #hipSuccess, #hipErrorNotReady, #hipErrorInvalidHandle
- *
- * This is thread-safe and returns a snapshot of the current state of the queue. However, if other
- * host threads are sending work to the stream, the status may change immediately after the function
- * is called. It is typically used for debug.
- *
- * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamWaitEvent, hipStreamSynchronize,
- * hipStreamDestroy
- */
-hipError_t hipStreamQuery(hipStream_t stream);
-
-
-/**
- * @brief Wait for all commands in stream to complete.
- *
- * @param[in] stream stream identifier.
- *
- * @return #hipSuccess, #hipErrorInvalidHandle
- *
- * This command is host-synchronous : the host will block until the specified stream is empty.
- *
- * This command follows standard null-stream semantics. Specifically, specifying the null stream
- * will cause the command to wait for other streams on the same device to complete all pending
- * operations.
- *
- * This command honors the hipDeviceLaunchBlocking flag, which controls whether the wait is active
- * or blocking.
- *
- * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamWaitEvent, hipStreamDestroy
- *
- */
-hipError_t hipStreamSynchronize(hipStream_t stream);
-
-
-/**
- * @brief Make the specified compute stream wait for an event
- *
- * @param[in] stream stream to make wait.
- * @param[in] event event to wait on
- * @param[in] flags control operation [must be 0]
- *
- * @return #hipSuccess, #hipErrorInvalidHandle
- *
- * This function inserts a wait operation into the specified stream.
- * All future work submitted to @p stream will wait until @p event reports completion before
- * beginning execution.
- *
- * This function only waits for commands in the current stream to complete. Notably,, this function
- * does not impliciy wait for commands in the default stream to complete, even if the specified
- * stream is created with hipStreamNonBlocking = 0.
- *
- * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamSynchronize, hipStreamDestroy
- */
-hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags);
-
-
-/**
- * @brief Return flags associated with this stream.
- *
- * @param[in] stream stream to be queried
- * @param[in,out] flags Pointer to an unsigned integer in which the stream's flags are returned
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidHandle
- *
- * @returns #hipSuccess #hipErrorInvalidValue #hipErrorInvalidHandle
- *
- * Return flags associated with this stream in *@p flags.
- *
- * @see hipStreamCreateWithFlags
- */
-hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int* flags);
-
-
-/**
- * @brief Query the priority of a stream.
- *
- * @param[in] stream stream to be queried
- * @param[in,out] priority Pointer to an unsigned integer in which the stream's priority is returned
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidHandle
- *
- * @returns #hipSuccess #hipErrorInvalidValue #hipErrorInvalidHandle
- *
- * Query the priority of a stream. The priority is returned in in priority.
- *
- * @see hipStreamCreateWithFlags
- */
-hipError_t hipStreamGetPriority(hipStream_t stream, int* priority);
-
-
-/**
- * @brief Create an asynchronous stream with the specified CU mask.
- *
- * @param[in, out] stream Pointer to new stream
- * @param[in ] cuMaskSize Size of CU mask bit array passed in.
- * @param[in ] cuMask Bit-vector representing the CU mask. Each active bit represents using one CU.
- * The first 32 bits represent the first 32 CUs, and so on. If its size is greater than physical
- * CU number (i.e., multiProcessorCount member of hipDeviceProp_t), the extra elements are ignored.
- * It is user's responsibility to make sure the input is meaningful.
- * @return #hipSuccess, #hipErrorInvalidHandle, #hipErrorInvalidValue
- *
- * Create a new asynchronous stream with the specified CU mask. @p stream returns an opaque handle
- * that can be used to reference the newly created stream in subsequent hipStream* commands. The
- * stream is allocated on the heap and will remain allocated even if the handle goes out-of-scope.
- * To release the memory used by the stream, application must call hipStreamDestroy.
- *
- *
- * @see hipStreamCreate, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy
- */
-hipError_t hipExtStreamCreateWithCUMask(hipStream_t* stream, uint32_t cuMaskSize, const uint32_t* cuMask);
-
-
-/**
- * @brief Get CU mask associated with an asynchronous stream
- *
- * @param[in] stream stream to be queried
- * @param[in] cuMaskSize number of the block of memories (uint32_t *) allocated by user
- * @param[out] cuMask Pointer to a pre-allocated block of memories (uint32_t *) in which
- * the stream's CU mask is returned. The CU mask is returned in a chunck of 32 bits where
- * each active bit represents one active CU
- * @return #hipSuccess, #hipErrorInvalidHandle, #hipErrorInvalidValue
- *
- * @see hipStreamCreate, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy
- */
-hipError_t hipExtStreamGetCUMask(hipStream_t stream, uint32_t cuMaskSize, uint32_t* cuMask);
-
-/**
- * Stream CallBack struct
- */
-typedef void (*hipStreamCallback_t)(hipStream_t stream, hipError_t status, void* userData);
-
-/**
- * @brief Adds a callback to be called on the host after all currently enqueued
- * items in the stream have completed. For each
- * cudaStreamAddCallback call, a callback will be executed exactly once.
- * The callback will block later work in the stream until it is finished.
- * @param[in] stream - Stream to add callback to
- * @param[in] callback - The function to call once preceding stream operations are complete
- * @param[in] userData - User specified data to be passed to the callback function
- * @param[in] flags - Reserved for future use, must be 0
- * @return #hipSuccess, #hipErrorInvalidHandle, #hipErrorNotSupported
- *
- * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamQuery, hipStreamSynchronize,
- * hipStreamWaitEvent, hipStreamDestroy, hipStreamCreateWithPriority
- *
- */
-hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData,
- unsigned int flags);
-
-
-// end doxygen Stream
-/**
- * @}
- */
-
-
-/**
- *-------------------------------------------------------------------------------------------------
- *-------------------------------------------------------------------------------------------------
- * @defgroup Event Event Management
- * @{
- * This section describes the event management functions of HIP runtime API.
- */
-
-/**
- * @brief Create an event with the specified flags
- *
- * @param[in,out] event Returns the newly created event.
- * @param[in] flags Flags to control event behavior. Valid values are #hipEventDefault,
- #hipEventBlockingSync, #hipEventDisableTiming, #hipEventInterprocess
-
- * #hipEventDefault : Default flag. The event will use active synchronization and will support
- timing. Blocking synchronization provides lowest possible latency at the expense of dedicating a
- CPU to poll on the event.
- * #hipEventBlockingSync : The event will use blocking synchronization : if hipEventSynchronize is
- called on this event, the thread will block until the event completes. This can increase latency
- for the synchroniation but can result in lower power and more resources for other CPU threads.
- * #hipEventDisableTiming : Disable recording of timing information.
-
- * @warning On AMD platform, hipEventInterprocess support is under development. Use of this flag
- will return an error.
- *
- * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue,
- #hipErrorLaunchFailure, #hipErrorOutOfMemory
- *
- * @see hipEventCreate, hipEventSynchronize, hipEventDestroy, hipEventElapsedTime
- */
-hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags);
-
-
-/**
- * Create an event
- *
- * @param[in,out] event Returns the newly created event.
- *
- * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue,
- * #hipErrorLaunchFailure, #hipErrorOutOfMemory
- *
- * @see hipEventCreateWithFlags, hipEventRecord, hipEventQuery, hipEventSynchronize,
- * hipEventDestroy, hipEventElapsedTime
- */
-hipError_t hipEventCreate(hipEvent_t* event);
-
-
-/**
- * @brief Record an event in the specified stream.
- *
- * @param[in] event event to record.
- * @param[in] stream stream in which to record event.
- * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized,
- * #hipErrorInvalidHandle, #hipErrorLaunchFailure
- *
- * hipEventQuery() or hipEventSynchronize() must be used to determine when the event
- * transitions from "recording" (after hipEventRecord() is called) to "recorded"
- * (when timestamps are set, if requested).
- *
- * Events which are recorded in a non-NULL stream will transition to
- * from recording to "recorded" state when they reach the head of
- * the specified stream, after all previous
- * commands in that stream have completed executing.
- *
- * If hipEventRecord() has been previously called on this event, then this call will overwrite any
- * existing state in event.
- *
- * If this function is called on an event that is currently being recorded, results are undefined
- * - either outstanding recording may save state into the event, and the order is not guaranteed.
- *
- * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventSynchronize,
- * hipEventDestroy, hipEventElapsedTime
- *
- */
-#ifdef __cplusplus
-hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream = NULL);
-#else
-hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream);
-#endif
-
-/**
- * @brief Destroy the specified event.
- *
- * @param[in] event Event to destroy.
- * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue,
- * #hipErrorLaunchFailure
- *
- * Releases memory associated with the event. If the event is recording but has not completed
- * recording when hipEventDestroy() is called, the function will return immediately and the
- * completion_future resources will be released later, when the hipDevice is synchronized.
- *
- * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventSynchronize, hipEventRecord,
- * hipEventElapsedTime
- *
- * @returns #hipSuccess
- */
-hipError_t hipEventDestroy(hipEvent_t event);
-
-
-/**
- * @brief Wait for an event to complete.
- *
- * This function will block until the event is ready, waiting for all previous work in the stream
- * specified when event was recorded with hipEventRecord().
- *
- * If hipEventRecord() has not been called on @p event, this function returns immediately.
- *
- * TODO-hip- This function needs to support hipEventBlockingSync parameter.
- *
- * @param[in] event Event on which to wait.
- * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized,
- * #hipErrorInvalidHandle, #hipErrorLaunchFailure
- *
- * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventDestroy, hipEventRecord,
- * hipEventElapsedTime
- */
-hipError_t hipEventSynchronize(hipEvent_t event);
-
-
-/**
- * @brief Return the elapsed time between two events.
- *
- * @param[out] ms : Return time between start and stop in ms.
- * @param[in] start : Start event.
- * @param[in] stop : Stop event.
- * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotReady, #hipErrorInvalidHandle,
- * #hipErrorNotInitialized, #hipErrorLaunchFailure
- *
- * Computes the elapsed time between two events. Time is computed in ms, with
- * a resolution of approximately 1 us.
- *
- * Events which are recorded in a NULL stream will block until all commands
- * on all other streams complete execution, and then record the timestamp.
- *
- * Events which are recorded in a non-NULL stream will record their timestamp
- * when they reach the head of the specified stream, after all previous
- * commands in that stream have completed executing. Thus the time that
- * the event recorded may be significantly after the host calls hipEventRecord().
- *
- * If hipEventRecord() has not been called on either event, then #hipErrorInvalidHandle is
- * returned. If hipEventRecord() has been called on both events, but the timestamp has not yet been
- * recorded on one or both events (that is, hipEventQuery() would return #hipErrorNotReady on at
- * least one of the events), then #hipErrorNotReady is returned.
- *
- * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventDestroy, hipEventRecord,
- * hipEventSynchronize
- */
-hipError_t hipEventElapsedTime(float* ms, hipEvent_t start, hipEvent_t stop);
-
-
-/**
- * @brief Query event status
- *
- * @param[in] event Event to query.
- * @returns #hipSuccess, #hipErrorNotReady, #hipErrorInvalidHandle, #hipErrorInvalidValue,
- * #hipErrorNotInitialized, #hipErrorLaunchFailure
- *
- * Query the status of the specified event. This function will return #hipErrorNotReady if all
- * commands in the appropriate stream (specified to hipEventRecord()) have completed. If that work
- * has not completed, or if hipEventRecord() was not called on the event, then #hipSuccess is
- * returned.
- *
- * @see hipEventCreate, hipEventCreateWithFlags, hipEventRecord, hipEventDestroy,
- * hipEventSynchronize, hipEventElapsedTime
- */
-hipError_t hipEventQuery(hipEvent_t event);
-
-
-// end doxygen Events
-/**
- * @}
- */
-
-
-/**
- *-------------------------------------------------------------------------------------------------
- *-------------------------------------------------------------------------------------------------
- * @defgroup Memory Memory Management
- * @{
- * This section describes the memory management functions of HIP runtime API.
- * The following CUDA APIs are not currently supported:
- * - cudaMalloc3D
- * - cudaMalloc3DArray
- * - TODO - more 2D, 3D, array APIs here.
- *
- *
- */
-
-/**
- * @brief Return attributes for the specified pointer
- *
- * @param[out] attributes for the specified pointer
- * @param[in] pointer to get attributes for
- *
- * @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue
- *
- * @see hipGetDeviceCount, hipGetDevice, hipSetDevice, hipChooseDevice
- */
-hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr);
-
-/**
- * @brief Allocate memory on the default accelerator
- *
- * @param[out] ptr Pointer to the allocated memory
- * @param[in] size Requested memory size
- *
- * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.
- *
- * @return #hipSuccess, #hipErrorOutOfMemory, #hipErrorInvalidValue (bad context, null *ptr)
- *
- * @see hipMallocPitch, hipFree, hipMallocArray, hipFreeArray, hipMalloc3D, hipMalloc3DArray,
- * hipHostFree, hipHostMalloc
- */
-hipError_t hipMalloc(void** ptr, size_t size);
-
-/**
- * @brief Allocate memory on the default accelerator
- *
- * @param[out] ptr Pointer to the allocated memory
- * @param[in] size Requested memory size
- * @param[in] flags Type of memory allocation
- *
- * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.
- *
- * @return #hipSuccess, #hipErrorOutOfMemory, #hipErrorInvalidValue (bad context, null *ptr)
- *
- * @see hipMallocPitch, hipFree, hipMallocArray, hipFreeArray, hipMalloc3D, hipMalloc3DArray,
- * hipHostFree, hipHostMalloc
- */
-hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flags);
-
-/**
- * @brief Allocate pinned host memory [Deprecated]
- *
- * @param[out] ptr Pointer to the allocated host pinned memory
- * @param[in] size Requested memory size
- *
- * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.
- *
- * @return #hipSuccess, #hipErrorOutOfMemory
- *
- * @deprecated use hipHostMalloc() instead
- */
-DEPRECATED("use hipHostMalloc instead")
-hipError_t hipMallocHost(void** ptr, size_t size);
-
-/**
- * @brief Allocate pinned host memory [Deprecated]
- *
- * @param[out] ptr Pointer to the allocated host pinned memory
- * @param[in] size Requested memory size
- *
- * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.
- *
- * @return #hipSuccess, #hipErrorOutOfMemory
- *
- * @deprecated use hipHostMalloc() instead
- */
-DEPRECATED("use hipHostMalloc instead")
-hipError_t hipMemAllocHost(void** ptr, size_t size);
-
-/**
- * @brief Allocate device accessible page locked host memory
- *
- * @param[out] ptr Pointer to the allocated host pinned memory
- * @param[in] size Requested memory size
- * @param[in] flags Type of host memory allocation
- *
- * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.
- *
- * @return #hipSuccess, #hipErrorOutOfMemory
- *
- * @see hipSetDeviceFlags, hipHostFree
- */
-hipError_t hipHostMalloc(void** ptr, size_t size, unsigned int flags);
-
-/**
- *-------------------------------------------------------------------------------------------------
- *-------------------------------------------------------------------------------------------------
- * @addtogroup MemoryM Managed Memory (ROCm HMM)
- * @{
- * @ingroup Memory
- * This section describes the managed memory management functions of HIP runtime API.
- *
- */
-
-/**
- * @brief Allocates memory that will be automatically managed by AMD HMM.
- *
- * @param [out] dev_ptr - pointer to allocated device memory
- * @param [in] size - requested allocation size in bytes
- * @param [in] flags - must be either hipMemAttachGlobal or hipMemAttachHost
- * (defaults to hipMemAttachGlobal)
- *
- * @returns #hipSuccess, #hipErrorMemoryAllocation, #hipErrorNotSupported, #hipErrorInvalidValue
- */
-hipError_t hipMallocManaged(void** dev_ptr,
- size_t size,
- unsigned int flags __dparm(hipMemAttachGlobal));
-
-/**
- * @brief Prefetches memory to the specified destination device using AMD HMM.
- *
- * @param [in] dev_ptr pointer to be prefetched
- * @param [in] count size in bytes for prefetching
- * @param [in] device destination device to prefetch to
- * @param [in] stream stream to enqueue prefetch operation
- *
- * @returns #hipSuccess, #hipErrorInvalidValue
- */
-hipError_t hipMemPrefetchAsync(const void* dev_ptr,
- size_t count,
- int device,
- hipStream_t stream __dparm(0));
-
-/**
- * @brief Advise about the usage of a given memory range to AMD HMM.
- *
- * @param [in] dev_ptr pointer to memory to set the advice for
- * @param [in] count size in bytes of the memory range
- * @param [in] advice advice to be applied for the specified memory range
- * @param [in] device device to apply the advice for
- *
- * @returns #hipSuccess, #hipErrorInvalidValue
- */
-hipError_t hipMemAdvise(const void* dev_ptr,
- size_t count,
- hipMemoryAdvise advice,
- int device);
-
-/**
- * @brief Query an attribute of a given memory range in AMD HMM.
- *
- * @param [in/out] data a pointer to a memory location where the result of each
- * attribute query will be written to
- * @param [in] data_size the size of data
- * @param [in] attribute the attribute to query
- * @param [in] dev_ptr start of the range to query
- * @param [in] count size of the range to query
- *
- * @returns #hipSuccess, #hipErrorInvalidValue
- */
-hipError_t hipMemRangeGetAttribute(void* data,
- size_t data_size,
- hipMemRangeAttribute attribute,
- const void* dev_ptr,
- size_t count);
-
-/**
- * @brief Query attributes of a given memory range in AMD HMM.
- *
- * @param [in/out] data a two-dimensional array containing pointers to memory locations
- * where the result of each attribute query will be written to
- * @param [in] data_sizes an array, containing the sizes of each result
- * @param [in] attributes the attribute to query
- * @param [in] num_attributes an array of attributes to query (numAttributes and the number
- * of attributes in this array should match)
- * @param [in] dev_ptr start of the range to query
- * @param [in] count size of the range to query
- *
- * @returns #hipSuccess, #hipErrorInvalidValue
- */
-hipError_t hipMemRangeGetAttributes(void** data,
- size_t* data_sizes,
- hipMemRangeAttribute* attributes,
- size_t num_attributes,
- const void* dev_ptr,
- size_t count);
-
-/**
- * @brief Attach memory to a stream asynchronously in AMD HMM.
- *
- * @param [in] stream - stream in which to enqueue the attach operation
- * @param [in] dev_ptr - pointer to memory (must be a pointer to managed memory or
- * to a valid host-accessible region of system-allocated memory)
- * @param [in] length - length of memory (defaults to zero)
- * @param [in] flags - must be one of cudaMemAttachGlobal, cudaMemAttachHost or
- * cudaMemAttachSingle (defaults to cudaMemAttachSingle)
- *
- * @returns #hipSuccess, #hipErrorInvalidValue
- */
-hipError_t hipStreamAttachMemAsync(hipStream_t stream,
- hipDeviceptr_t* dev_ptr,
- size_t length __dparm(0),
- unsigned int flags __dparm(hipMemAttachSingle));
-
-// end doxygen Managed Memory
-/**
- * @}
- */
-
-/**
- * @brief Allocate device accessible page locked host memory [Deprecated]
- *
- * @param[out] ptr Pointer to the allocated host pinned memory
- * @param[in] size Requested memory size
- * @param[in] flags Type of host memory allocation
- *
- * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.
- *
- * @return #hipSuccess, #hipErrorOutOfMemory
- *
- * @deprecated use hipHostMalloc() instead
- */
-DEPRECATED("use hipHostMalloc instead")
-hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags);
-
-/**
- * @brief Get Device pointer from Host Pointer allocated through hipHostMalloc
- *
- * @param[out] dstPtr Device Pointer mapped to passed host pointer
- * @param[in] hstPtr Host Pointer allocated through hipHostMalloc
- * @param[in] flags Flags to be passed for extension
- *
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorOutOfMemory
- *
- * @see hipSetDeviceFlags, hipHostMalloc
- */
-hipError_t hipHostGetDevicePointer(void** devPtr, void* hstPtr, unsigned int flags);
-
-/**
- * @brief Return flags associated with host pointer
- *
- * @param[out] flagsPtr Memory location to store flags
- * @param[in] hostPtr Host Pointer allocated through hipHostMalloc
- * @return #hipSuccess, #hipErrorInvalidValue
- *
- * @see hipHostMalloc
- */
-hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr);
-
-/**
- * @brief Register host memory so it can be accessed from the current device.
- *
- * @param[out] hostPtr Pointer to host memory to be registered.
- * @param[in] sizeBytes size of the host memory
- * @param[in] flags. See below.
- *
- * Flags:
- * - #hipHostRegisterDefault Memory is Mapped and Portable
- * - #hipHostRegisterPortable Memory is considered registered by all contexts. HIP only supports
- * one context so this is always assumed true.
- * - #hipHostRegisterMapped Map the allocation into the address space for the current device.
- * The device pointer can be obtained with #hipHostGetDevicePointer.
- *
- *
- * After registering the memory, use #hipHostGetDevicePointer to obtain the mapped device pointer.
- * On many systems, the mapped device pointer will have a different value than the mapped host
- * pointer. Applications must use the device pointer in device code, and the host pointer in device
- * code.
- *
- * On some systems, registered memory is pinned. On some systems, registered memory may not be
- * actually be pinned but uses OS or hardware facilities to all GPU access to the host memory.
- *
- * Developers are strongly encouraged to register memory blocks which are aligned to the host
- * cache-line size. (typically 64-bytes but can be obtains from the CPUID instruction).
- *
- * If registering non-aligned pointers, the application must take care when register pointers from
- * the same cache line on different devices. HIP's coarse-grained synchronization model does not
- * guarantee correct results if different devices write to different parts of the same cache block -
- * typically one of the writes will "win" and overwrite data from the other registered memory
- * region.
- *
- * @return #hipSuccess, #hipErrorOutOfMemory
- *
- * @see hipHostUnregister, hipHostGetFlags, hipHostGetDevicePointer
- */
-hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags);
-
-/**
- * @brief Un-register host pointer
- *
- * @param[in] hostPtr Host pointer previously registered with #hipHostRegister
- * @return Error code
- *
- * @see hipHostRegister
- */
-hipError_t hipHostUnregister(void* hostPtr);
-
-/**
- * Allocates at least width (in bytes) * height bytes of linear memory
- * Padding may occur to ensure alighnment requirements are met for the given row
- * The change in width size due to padding will be returned in *pitch.
- * Currently the alignment is set to 128 bytes
- *
- * @param[out] ptr Pointer to the allocated device memory
- * @param[out] pitch Pitch for allocation (in bytes)
- * @param[in] width Requested pitched allocation width (in bytes)
- * @param[in] height Requested pitched allocation height
- *
- * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.
- *
- * @return Error code
- *
- * @see hipMalloc, hipFree, hipMallocArray, hipFreeArray, hipHostFree, hipMalloc3D,
- * hipMalloc3DArray, hipHostMalloc
- */
-
-hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height);
-
-/**
- * Allocates at least width (in bytes) * height bytes of linear memory
- * Padding may occur to ensure alighnment requirements are met for the given row
- * The change in width size due to padding will be returned in *pitch.
- * Currently the alignment is set to 128 bytes
- *
- * @param[out] dptr Pointer to the allocated device memory
- * @param[out] pitch Pitch for allocation (in bytes)
- * @param[in] width Requested pitched allocation width (in bytes)
- * @param[in] height Requested pitched allocation height
- *
- * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.
- * The intended usage of pitch is as a separate parameter of the allocation, used to compute addresses within the 2D array.
- * Given the row and column of an array element of type T, the address is computed as:
- * T* pElement = (T*)((char*)BaseAddress + Row * Pitch) + Column;
- *
- * @return Error code
- *
- * @see hipMalloc, hipFree, hipMallocArray, hipFreeArray, hipHostFree, hipMalloc3D,
- * hipMalloc3DArray, hipHostMalloc
- */
-
-hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr, size_t* pitch, size_t widthInBytes, size_t height, unsigned int elementSizeBytes);
-
-/**
- * @brief Free memory allocated by the hcc hip memory allocation API.
- * This API performs an implicit hipDeviceSynchronize() call.
- * If pointer is NULL, the hip runtime is initialized and hipSuccess is returned.
- *
- * @param[in] ptr Pointer to memory to be freed
- * @return #hipSuccess
- * @return #hipErrorInvalidDevicePointer (if pointer is invalid, including host pointers allocated
- * with hipHostMalloc)
- *
- * @see hipMalloc, hipMallocPitch, hipMallocArray, hipFreeArray, hipHostFree, hipMalloc3D,
- * hipMalloc3DArray, hipHostMalloc
- */
-hipError_t hipFree(void* ptr);
-
-/**
- * @brief Free memory allocated by the hcc hip host memory allocation API. [Deprecated]
- *
- * @param[in] ptr Pointer to memory to be freed
- * @return #hipSuccess,
- * #hipErrorInvalidValue (if pointer is invalid, including device pointers allocated with
- hipMalloc)
-
- * @deprecated use hipHostFree() instead
- */
-DEPRECATED("use hipHostFree instead")
-hipError_t hipFreeHost(void* ptr);
-
-/**
- * @brief Free memory allocated by the hcc hip host memory allocation API
- * This API performs an implicit hipDeviceSynchronize() call.
- * If pointer is NULL, the hip runtime is initialized and hipSuccess is returned.
- *
- * @param[in] ptr Pointer to memory to be freed
- * @return #hipSuccess,
- * #hipErrorInvalidValue (if pointer is invalid, including device pointers allocated with
- * hipMalloc)
- *
- * @see hipMalloc, hipMallocPitch, hipFree, hipMallocArray, hipFreeArray, hipMalloc3D,
- * hipMalloc3DArray, hipHostMalloc
- */
-hipError_t hipHostFree(void* ptr);
-
-/**
- * @brief Copy data from src to dst.
- *
- * It supports memory from host to device,
- * device to host, device to device and host to host
- * The src and dst must not overlap.
- *
- * For hipMemcpy, the copy is always performed by the current device (set by hipSetDevice).
- * For multi-gpu or peer-to-peer configurations, it is recommended to set the current device to the
- * device where the src data is physically located. For optimal peer-to-peer copies, the copy device
- * must be able to access the src and dst pointers (by calling hipDeviceEnablePeerAccess with copy
- * agent as the current device and src/dest as the peerDevice argument. if this is not done, the
- * hipMemcpy will still work, but will perform the copy using a staging buffer on the host.
- * Calling hipMemcpy with dst and src pointers that do not match the hipMemcpyKind results in
- * undefined behavior.
- *
- * @param[out] dst Data being copy to
- * @param[in] src Data being copy from
- * @param[in] sizeBytes Data size in bytes
- * @param[in] copyType Memory copy type
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknowni
- *
- * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost,
- * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA,
- * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD,
- * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync,
- * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo,
- * hipMemHostAlloc, hipMemHostGetDevicePointer
- */
-hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind);
-
-// TODO: Add description
-hipError_t hipMemcpyWithStream(void* dst, const void* src, size_t sizeBytes,
- hipMemcpyKind kind, hipStream_t stream);
-/**
- * @brief Copy data from Host to Device
- *
- * @param[out] dst Data being copy to
- * @param[in] src Data being copy from
- * @param[in] sizeBytes Data size in bytes
- *
- * @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext,
- * #hipErrorInvalidValue
- *
- * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost,
- * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA,
- * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD,
- * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync,
- * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo,
- * hipMemHostAlloc, hipMemHostGetDevicePointer
- */
-hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes);
-
-/**
- * @brief Copy data from Device to Host
- *
- * @param[out] dst Data being copy to
- * @param[in] src Data being copy from
- * @param[in] sizeBytes Data size in bytes
- *
- * @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext,
- * #hipErrorInvalidValue
- *
- * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost,
- * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA,
- * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD,
- * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync,
- * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo,
- * hipMemHostAlloc, hipMemHostGetDevicePointer
- */
-hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes);
-
-/**
- * @brief Copy data from Device to Device
- *
- * @param[out] dst Data being copy to
- * @param[in] src Data being copy from
- * @param[in] sizeBytes Data size in bytes
- *
- * @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext,
- * #hipErrorInvalidValue
- *
- * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost,
- * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA,
- * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD,
- * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync,
- * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo,
- * hipMemHostAlloc, hipMemHostGetDevicePointer
- */
-hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes);
-
-/**
- * @brief Copy data from Host to Device asynchronously
- *
- * @param[out] dst Data being copy to
- * @param[in] src Data being copy from
- * @param[in] sizeBytes Data size in bytes
- *
- * @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext,
- * #hipErrorInvalidValue
- *
- * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost,
- * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA,
- * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD,
- * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync,
- * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo,
- * hipMemHostAlloc, hipMemHostGetDevicePointer
- */
-hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t sizeBytes, hipStream_t stream);
-
-/**
- * @brief Copy data from Device to Host asynchronously
- *
- * @param[out] dst Data being copy to
- * @param[in] src Data being copy from
- * @param[in] sizeBytes Data size in bytes
- *
- * @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext,
- * #hipErrorInvalidValue
- *
- * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost,
- * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA,
- * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD,
- * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync,
- * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo,
- * hipMemHostAlloc, hipMemHostGetDevicePointer
- */
-hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream);
-
-/**
- * @brief Copy data from Device to Device asynchronously
- *
- * @param[out] dst Data being copy to
- * @param[in] src Data being copy from
- * @param[in] sizeBytes Data size in bytes
- *
- * @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext,
- * #hipErrorInvalidValue
- *
- * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost,
- * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA,
- * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD,
- * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync,
- * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo,
- * hipMemHostAlloc, hipMemHostGetDevicePointer
- */
-hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes,
- hipStream_t stream);
-
-#if __HIP_ROCclr__
-hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes,
- hipModule_t hmod, const char* name);
-
-hipError_t hipGetSymbolAddress(void** devPtr, const void* symbol);
-hipError_t hipGetSymbolSize(size_t* size, const void* symbol);
-hipError_t hipMemcpyToSymbol(const void* symbol, const void* src,
- size_t sizeBytes, size_t offset __dparm(0),
- hipMemcpyKind kind __dparm(hipMemcpyHostToDevice));
-hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src,
- size_t sizeBytes, size_t offset,
- hipMemcpyKind kind, hipStream_t stream __dparm(0));
-hipError_t hipMemcpyFromSymbol(void* dst, const void* symbol,
- size_t sizeBytes, size_t offset __dparm(0),
- hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost));
-hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbol,
- size_t sizeBytes, size_t offset,
- hipMemcpyKind kind,
- hipStream_t stream __dparm(0));
-#else
-hipError_t hipModuleGetGlobal(void**, size_t*, hipModule_t, const char*);
-
-#ifdef __cplusplus //Start : Not supported in gcc
-namespace hip_impl {
-inline
-__attribute__((visibility("hidden")))
-hipError_t read_agent_global_from_process(hipDeviceptr_t* dptr, size_t* bytes,
- const char* name);
-} // Namespace hip_impl.
-
-
-/**
- * @brief Copies the memory address of symbol @p symbolName to @p devPtr
- *
- * @param[in] symbolName - Symbol on device
- * @param[out] devPtr - Pointer to a pointer to the memory referred to by the symbol
- * @return #hipSuccess, #hipErrorNotInitialized, #hipErrorNotFound
- *
- * @see hipGetSymbolSize, hipMemcpyToSymbol, hipMemcpyFromSymbol, hipMemcpyToSymbolAsync,
- * hipMemcpyFromSymbolAsync
- */
-inline
-__attribute__((visibility("hidden")))
-hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) {
- //HIP_INIT_API(hipGetSymbolAddress, devPtr, symbolName);
- hip_impl::hip_init();
- size_t size = 0;
- return hip_impl::read_agent_global_from_process(devPtr, &size, (const char*)symbolName);
-}
-
-
-/**
- * @brief Copies the size of symbol @p symbolName to @p size
- *
- * @param[in] symbolName - Symbol on device
- * @param[out] size - Pointer to the size of the symbol
- * @return #hipSuccess, #hipErrorNotInitialized, #hipErrorNotFound
- *
- * @see hipGetSymbolSize, hipMemcpyToSymbol, hipMemcpyFromSymbol, hipMemcpyToSymbolAsync,
- * hipMemcpyFromSymbolAsync
- */
-inline
-__attribute__((visibility("hidden")))
-hipError_t hipGetSymbolSize(size_t* size, const void* symbolName) {
- // HIP_INIT_API(hipGetSymbolSize, size, symbolName);
- hip_impl::hip_init();
- void* devPtr = nullptr;
- return hip_impl::read_agent_global_from_process(&devPtr, size, (const char*)symbolName);
-}
-#endif // End : Not supported in gcc
-
-#if defined(__cplusplus)
-} // extern "C"
-#endif
-
-#ifdef __cplusplus
-namespace hip_impl {
-hipError_t hipMemcpyToSymbol(void*, const void*, size_t, size_t, hipMemcpyKind,
- const char*);
-} // Namespace hip_impl.
-#endif
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-/**
- * @brief Copies @p sizeBytes bytes from the memory area pointed to by @p src to the memory area
- * pointed to by @p offset bytes from the start of symbol @p symbol.
- *
- * The memory areas may not overlap. Symbol can either be a variable that resides in global or
- * constant memory space, or it can be a character string, naming a variable that resides in global
- * or constant memory space. Kind can be either hipMemcpyHostToDevice or hipMemcpyDeviceToDevice
- * TODO: cudaErrorInvalidSymbol and cudaErrorInvalidMemcpyDirection is not supported, use
- * hipErrorUnknown for now.
- *
- * @param[in] symbolName - Symbol destination on device
- * @param[in] src - Data being copy from
- * @param[in] sizeBytes - Data size in bytes
- * @param[in] offset - Offset from start of symbol in bytes
- * @param[in] kind - Type of transfer
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown
- *
- * @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray,
- * hipMemcpy2DFromArray, hipMemcpyArrayToArray, hipMemcpy2DArrayToArray, hipMemcpyFromSymbol,
- * hipMemcpyAsync, hipMemcpy2DAsync, hipMemcpyToArrayAsync, hipMemcpy2DToArrayAsync,
- * hipMemcpyFromArrayAsync, hipMemcpy2DFromArrayAsync, hipMemcpyToSymbolAsync,
- * hipMemcpyFromSymbolAsync
- */
-#ifdef __cplusplus
-inline
-__attribute__((visibility("hidden")))
-hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src,
- size_t sizeBytes, size_t offset __dparm(0),
- hipMemcpyKind kind __dparm(hipMemcpyHostToDevice)) {
- if (!symbolName) return hipErrorInvalidSymbol;
-
- hipDeviceptr_t dst = NULL;
- hipGetSymbolAddress(&dst, (const char*)symbolName);
-
- return hip_impl::hipMemcpyToSymbol(dst, src, sizeBytes, offset, kind,
- (const char*)symbolName);
-}
-#endif
-
-#if defined(__cplusplus)
-} // extern "C"
-#endif
-
-#ifdef __cplusplus
-namespace hip_impl {
-hipError_t hipMemcpyToSymbolAsync(void*, const void*, size_t, size_t,
- hipMemcpyKind, hipStream_t, const char*);
-hipError_t hipMemcpyFromSymbol(void*, const void*, size_t, size_t,
- hipMemcpyKind, const char*);
-hipError_t hipMemcpyFromSymbolAsync(void*, const void*, size_t, size_t,
- hipMemcpyKind, hipStream_t, const char*);
-} // Namespace hip_impl.
-#endif
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-/**
- * @brief Copies @p sizeBytes bytes from the memory area pointed to by @p src to the memory area
- * pointed to by @p offset bytes from the start of symbol @p symbol
- *
- * The memory areas may not overlap. Symbol can either be a variable that resides in global or
- * constant memory space, or it can be a character string, naming a variable that resides in global
- * or constant memory space. Kind can be either hipMemcpyHostToDevice or hipMemcpyDeviceToDevice
- * hipMemcpyToSymbolAsync() is asynchronous with respect to the host, so the call may return before
- * copy is complete.
- * TODO: cudaErrorInvalidSymbol and cudaErrorInvalidMemcpyDirection is not supported, use
- * hipErrorUnknown for now.
- *
- * @param[in] symbolName - Symbol destination on device
- * @param[in] src - Data being copy from
- * @param[in] sizeBytes - Data size in bytes
- * @param[in] offset - Offset from start of symbol in bytes
- * @param[in] kind - Type of transfer
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown
- *
- * @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray,
- * hipMemcpy2DFromArray, hipMemcpyArrayToArray, hipMemcpy2DArrayToArray, hipMemcpyFromSymbol,
- * hipMemcpyAsync, hipMemcpy2DAsync, hipMemcpyToArrayAsync, hipMemcpy2DToArrayAsync,
- * hipMemcpyFromArrayAsync, hipMemcpy2DFromArrayAsync, hipMemcpyToSymbolAsync,
- * hipMemcpyFromSymbolAsync
- */
-
-#ifdef __cplusplus //Start : Not supported in gcc
-inline
-__attribute__((visibility("hidden")))
-hipError_t hipMemcpyToSymbolAsync(const void* symbolName, const void* src,
- size_t sizeBytes, size_t offset,
- hipMemcpyKind kind, hipStream_t stream __dparm(0)) {
- if (!symbolName) return hipErrorInvalidSymbol;
-
- hipDeviceptr_t dst = NULL;
- hipGetSymbolAddress(&dst, symbolName);
-
- return hip_impl::hipMemcpyToSymbolAsync(dst, src, sizeBytes, offset, kind,
- stream,
- (const char*)symbolName);
-}
-
-inline
-__attribute__((visibility("hidden")))
-hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName,
- size_t sizeBytes, size_t offset __dparm(0),
- hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)) {
- if (!symbolName) return hipErrorInvalidSymbol;
-
- hipDeviceptr_t src = NULL;
- hipGetSymbolAddress(&src, symbolName);
-
- return hip_impl::hipMemcpyFromSymbol(dst, src, sizeBytes, offset, kind,
- (const char*)symbolName);
-}
-
-inline
-__attribute__((visibility("hidden")))
-hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName,
- size_t sizeBytes, size_t offset,
- hipMemcpyKind kind,
- hipStream_t stream __dparm(0)) {
- if (!symbolName) return hipErrorInvalidSymbol;
-
- hipDeviceptr_t src = NULL;
- hipGetSymbolAddress(&src, symbolName);
-
- return hip_impl::hipMemcpyFromSymbolAsync(dst, src, sizeBytes, offset, kind,
- stream,
- (const char*)symbolName);
-}
-#endif // End : Not supported in gcc
-
-#endif // __HIP_ROCclr__
-/**
- * @brief Copy data from src to dst asynchronously.
- *
- * @warning If host or dest are not pinned, the memory copy will be performed synchronously. For
- * best performance, use hipHostMalloc to allocate host memory that is transferred asynchronously.
- *
- * @warning on HCC hipMemcpyAsync does not support overlapped H2D and D2H copies.
- * For hipMemcpy, the copy is always performed by the device associated with the specified stream.
- *
- * For multi-gpu or peer-to-peer configurations, it is recommended to use a stream which is a
- * attached to the device where the src data is physically located. For optimal peer-to-peer copies,
- * the copy device must be able to access the src and dst pointers (by calling
- * hipDeviceEnablePeerAccess with copy agent as the current device and src/dest as the peerDevice
- * argument. if this is not done, the hipMemcpy will still work, but will perform the copy using a
- * staging buffer on the host.
- *
- * @param[out] dst Data being copy to
- * @param[in] src Data being copy from
- * @param[in] sizeBytes Data size in bytes
- * @param[in] accelerator_view Accelerator view which the copy is being enqueued
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown
- *
- * @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray,
- * hipMemcpy2DFromArray, hipMemcpyArrayToArray, hipMemcpy2DArrayToArray, hipMemcpyToSymbol,
- * hipMemcpyFromSymbol, hipMemcpy2DAsync, hipMemcpyToArrayAsync, hipMemcpy2DToArrayAsync,
- * hipMemcpyFromArrayAsync, hipMemcpy2DFromArrayAsync, hipMemcpyToSymbolAsync,
- * hipMemcpyFromSymbolAsync
- */
-hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind,
- hipStream_t stream __dparm(0));
-
-/**
- * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant
- * byte value value.
- *
- * @param[out] dst Data being filled
- * @param[in] constant value to be set
- * @param[in] sizeBytes Data size in bytes
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized
- */
-hipError_t hipMemset(void* dst, int value, size_t sizeBytes);
-
-/**
- * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant
- * byte value value.
- *
- * @param[out] dst Data ptr to be filled
- * @param[in] constant value to be set
- * @param[in] number of values to be set
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized
- */
-hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t count);
-
-/**
- * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant
- * byte value value.
- *
- * hipMemsetD8Async() is asynchronous with respect to the host, so the call may return before the
- * memset is complete. The operation can optionally be associated to a stream by passing a non-zero
- * stream argument. If stream is non-zero, the operation may overlap with operations in other
- * streams.
- *
- * @param[out] dst Data ptr to be filled
- * @param[in] constant value to be set
- * @param[in] number of values to be set
- * @param[in] stream - Stream identifier
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized
- */
-hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t count, hipStream_t stream __dparm(0));
-
-/**
- * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant
- * short value value.
- *
- * @param[out] dst Data ptr to be filled
- * @param[in] constant value to be set
- * @param[in] number of values to be set
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized
- */
-hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t count);
-
-/**
- * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant
- * short value value.
- *
- * hipMemsetD16Async() is asynchronous with respect to the host, so the call may return before the
- * memset is complete. The operation can optionally be associated to a stream by passing a non-zero
- * stream argument. If stream is non-zero, the operation may overlap with operations in other
- * streams.
- *
- * @param[out] dst Data ptr to be filled
- * @param[in] constant value to be set
- * @param[in] number of values to be set
- * @param[in] stream - Stream identifier
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized
- */
-hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t count, hipStream_t stream __dparm(0));
-
-/**
- * @brief Fills the memory area pointed to by dest with the constant integer
- * value for specified number of times.
- *
- * @param[out] dst Data being filled
- * @param[in] constant value to be set
- * @param[in] number of values to be set
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized
- */
-hipError_t hipMemsetD32(hipDeviceptr_t dest, int value, size_t count);
-
-/**
- * @brief Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant
- * byte value value.
- *
- * hipMemsetAsync() is asynchronous with respect to the host, so the call may return before the
- * memset is complete. The operation can optionally be associated to a stream by passing a non-zero
- * stream argument. If stream is non-zero, the operation may overlap with operations in other
- * streams.
- *
- * @param[out] dst Pointer to device memory
- * @param[in] value - Value to set for each byte of specified memory
- * @param[in] sizeBytes - Size in bytes to set
- * @param[in] stream - Stream identifier
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree
- */
-hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream __dparm(0));
-
-/**
- * @brief Fills the memory area pointed to by dev with the constant integer
- * value for specified number of times.
- *
- * hipMemsetD32Async() is asynchronous with respect to the host, so the call may return before the
- * memset is complete. The operation can optionally be associated to a stream by passing a non-zero
- * stream argument. If stream is non-zero, the operation may overlap with operations in other
- * streams.
- *
- * @param[out] dst Pointer to device memory
- * @param[in] value - Value to set for each byte of specified memory
- * @param[in] count - number of values to be set
- * @param[in] stream - Stream identifier
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree
- */
-hipError_t hipMemsetD32Async(hipDeviceptr_t dst, int value, size_t count,
- hipStream_t stream __dparm(0));
-
-/**
- * @brief Fills the memory area pointed to by dst with the constant value.
- *
- * @param[out] dst Pointer to device memory
- * @param[in] pitch - data size in bytes
- * @param[in] value - constant value to be set
- * @param[in] width
- * @param[in] height
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree
- */
-
-hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height);
-
-/**
- * @brief Fills asynchronously the memory area pointed to by dst with the constant value.
- *
- * @param[in] dst Pointer to device memory
- * @param[in] pitch - data size in bytes
- * @param[in] value - constant value to be set
- * @param[in] width
- * @param[in] height
- * @param[in] stream
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree
- */
-
-hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height,hipStream_t stream __dparm(0));
-
-/**
- * @brief Fills synchronously the memory area pointed to by pitchedDevPtr with the constant value.
- *
- * @param[in] pitchedDevPtr
- * @param[in] value - constant value to be set
- * @param[in] extent
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree
- */
-hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent );
-
-/**
- * @brief Fills asynchronously the memory area pointed to by pitchedDevPtr with the constant value.
- *
- * @param[in] pitchedDevPtr
- * @param[in] value - constant value to be set
- * @param[in] extent
- * @param[in] stream
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree
- */
-hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent ,hipStream_t stream __dparm(0));
-
-/**
- * @brief Query memory info.
- * Return snapshot of free memory, and total allocatable memory on the device.
- *
- * Returns in *free a snapshot of the current free memory.
- * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue
- * @warning On HCC, the free memory only accounts for memory allocated by this process and may be
- *optimistic.
- **/
-hipError_t hipMemGetInfo(size_t* free, size_t* total);
-
-
-hipError_t hipMemPtrGetInfo(void* ptr, size_t* size);
-
-
-/**
- * @brief Allocate an array on the device.
- *
- * @param[out] array Pointer to allocated array in device memory
- * @param[in] desc Requested channel format
- * @param[in] width Requested array allocation width
- * @param[in] height Requested array allocation height
- * @param[in] flags Requested properties of allocated array
- * @return #hipSuccess, #hipErrorOutOfMemory
- *
- * @see hipMalloc, hipMallocPitch, hipFree, hipFreeArray, hipHostMalloc, hipHostFree
- */
-hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, size_t width,
- size_t height __dparm(0), unsigned int flags __dparm(hipArrayDefault));
-hipError_t hipArrayCreate(hipArray** pHandle, const HIP_ARRAY_DESCRIPTOR* pAllocateArray);
-
-hipError_t hipArray3DCreate(hipArray** array, const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray);
-
-hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent);
-
-/**
- * @brief Frees an array on the device.
- *
- * @param[in] array Pointer to array to free
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized
- *
- * @see hipMalloc, hipMallocPitch, hipFree, hipMallocArray, hipHostMalloc, hipHostFree
- */
-hipError_t hipFreeArray(hipArray* array);
-
-/**
- * @brief Frees a mipmapped array on the device
- *
- * @param[in] mipmappedArray - Pointer to mipmapped array to free
- *
- * @return #hipSuccess, #hipErrorInvalidValue
- */
-hipError_t hipFreeMipmappedArray(hipMipmappedArray_t mipmappedArray);
-
-/**
- * @brief Allocate an array on the device.
- *
- * @param[out] array Pointer to allocated array in device memory
- * @param[in] desc Requested channel format
- * @param[in] extent Requested array allocation width, height and depth
- * @param[in] flags Requested properties of allocated array
- * @return #hipSuccess, #hipErrorOutOfMemory
- *
- * @see hipMalloc, hipMallocPitch, hipFree, hipFreeArray, hipHostMalloc, hipHostFree
- */
-
-hipError_t hipMalloc3DArray(hipArray** array, const struct hipChannelFormatDesc* desc,
- struct hipExtent extent, unsigned int flags);
-
-/**
- * @brief Allocate a mipmapped array on the device
- *
- * @param[out] mipmappedArray - Pointer to allocated mipmapped array in device memory
- * @param[in] desc - Requested channel format
- * @param[in] extent - Requested allocation size (width field in elements)
- * @param[in] numLevels - Number of mipmap levels to allocate
- * @param[in] flags - Flags for extensions
- *
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryAllocation
- */
-hipError_t hipMallocMipmappedArray(
- hipMipmappedArray_t *mipmappedArray,
- const struct hipChannelFormatDesc* desc,
- struct hipExtent extent,
- unsigned int numLevels,
- unsigned int flags __dparm(0));
-
-/**
- * @brief Gets a mipmap level of a HIP mipmapped array
- *
- * @param[out] levelArray - Returned mipmap level HIP array
- * @param[in] mipmappedArray - HIP mipmapped array
- * @param[in] level - Mipmap level
- *
- * @return #hipSuccess, #hipErrorInvalidValue
- */
-hipError_t hipGetMipmappedArrayLevel(
- hipArray_t *levelArray,
- hipMipmappedArray_const_t mipmappedArray,
- unsigned int level);
-
-/**
- * @brief Copies data between host and device.
- *
- * @param[in] dst Destination memory address
- * @param[in] dpitch Pitch of destination memory
- * @param[in] src Source memory address
- * @param[in] spitch Pitch of source memory
- * @param[in] width Width of matrix transfer (columns in bytes)
- * @param[in] height Height of matrix transfer (rows)
- * @param[in] kind Type of transfer
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
- * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
- *
- * @see hipMemcpy, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, hipMemcpyToSymbol,
- * hipMemcpyAsync
- */
-hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width,
- size_t height, hipMemcpyKind kind);
-
-/**
- * @brief Copies memory for 2D arrays.
- * @param[in] pCopy Parameters for the memory copy
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
- * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
- *
- * @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray,
- * hipMemcpyToSymbol, hipMemcpyAsync
-*/
-hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy);
-
-/**
- * @brief Copies memory for 2D arrays.
- * @param[in] pCopy Parameters for the memory copy
- * @param[in] stream Stream to use
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
- * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
- *
- * @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray,
- * hipMemcpyToSymbol, hipMemcpyAsync
-*/
-hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, hipStream_t stream __dparm(0));
-
-/**
- * @brief Copies data between host and device.
- *
- * @param[in] dst Destination memory address
- * @param[in] dpitch Pitch of destination memory
- * @param[in] src Source memory address
- * @param[in] spitch Pitch of source memory
- * @param[in] width Width of matrix transfer (columns in bytes)
- * @param[in] height Height of matrix transfer (rows)
- * @param[in] kind Type of transfer
- * @param[in] stream Stream to use
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
- * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
- *
- * @see hipMemcpy, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, hipMemcpyToSymbol,
- * hipMemcpyAsync
- */
-hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width,
- size_t height, hipMemcpyKind kind, hipStream_t stream __dparm(0));
-
-/**
- * @brief Copies data between host and device.
- *
- * @param[in] dst Destination memory address
- * @param[in] wOffset Destination starting X offset
- * @param[in] hOffset Destination starting Y offset
- * @param[in] src Source memory address
- * @param[in] spitch Pitch of source memory
- * @param[in] width Width of matrix transfer (columns in bytes)
- * @param[in] height Height of matrix transfer (rows)
- * @param[in] kind Type of transfer
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
- * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
- *
- * @see hipMemcpy, hipMemcpyToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,
- * hipMemcpyAsync
- */
-hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src,
- size_t spitch, size_t width, size_t height, hipMemcpyKind kind);
-
-/**
- * @brief Copies data between host and device.
- *
- * @param[in] dst Destination memory address
- * @param[in] wOffset Destination starting X offset
- * @param[in] hOffset Destination starting Y offset
- * @param[in] src Source memory address
- * @param[in] count size in bytes to copy
- * @param[in] kind Type of transfer
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
- * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
- *
- * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,
- * hipMemcpyAsync
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src,
- size_t count, hipMemcpyKind kind);
-
-/**
- * @brief Copies data between host and device.
- *
- * @param[in] dst Destination memory address
- * @param[in] srcArray Source memory address
- * @param[in] woffset Source starting X offset
- * @param[in] hOffset Source starting Y offset
- * @param[in] count Size in bytes to copy
- * @param[in] kind Type of transfer
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
- * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
- *
- * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,
- * hipMemcpyAsync
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffset, size_t hOffset,
- size_t count, hipMemcpyKind kind);
-
-/**
- * @brief Copies data between host and device.
- *
- * @param[in] dst Destination memory address
- * @param[in] dpitch Pitch of destination memory
- * @param[in] src Source memory address
- * @param[in] wOffset Source starting X offset
- * @param[in] hOffset Source starting Y offset
- * @param[in] width Width of matrix transfer (columns in bytes)
- * @param[in] height Height of matrix transfer (rows)
- * @param[in] kind Type of transfer
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
- * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
- *
- * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,
- * hipMemcpyAsync
- */
-hipError_t hipMemcpy2DFromArray( void* dst, size_t dpitch, hipArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind);
-
-/**
- * @brief Copies data between host and device asynchronously.
- *
- * @param[in] dst Destination memory address
- * @param[in] dpitch Pitch of destination memory
- * @param[in] src Source memory address
- * @param[in] wOffset Source starting X offset
- * @param[in] hOffset Source starting Y offset
- * @param[in] width Width of matrix transfer (columns in bytes)
- * @param[in] height Height of matrix transfer (rows)
- * @param[in] kind Type of transfer
- * @param[in] stream Accelerator view which the copy is being enqueued
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
- * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
- *
- * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,
- * hipMemcpyAsync
- */
-hipError_t hipMemcpy2DFromArrayAsync( void* dst, size_t dpitch, hipArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream __dparm(0));
-
-/**
- * @brief Copies data between host and device.
- *
- * @param[in] dst Destination memory address
- * @param[in] srcArray Source array
- * @param[in] srcoffset Offset in bytes of source array
- * @param[in] count Size of memory copy in bytes
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
- * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
- *
- * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,
- * hipMemcpyAsync
- */
-hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t count);
-
-/**
- * @brief Copies data between host and device.
- *
- * @param[in] dstArray Destination memory address
- * @param[in] dstOffset Offset in bytes of destination array
- * @param[in] srcHost Source host pointer
- * @param[in] count Size of memory copy in bytes
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
- * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
- *
- * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,
- * hipMemcpyAsync
- */
-hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHost, size_t count);
-
-/**
- * @brief Copies data between host and device.
- *
- * @param[in] p 3D memory copy parameters
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
- * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
- *
- * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,
- * hipMemcpyAsync
- */
-hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p);
-
-/**
- * @brief Copies data between host and device asynchronously.
- *
- * @param[in] p 3D memory copy parameters
- * @param[in] stream Stream to use
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
- * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
- *
- * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,
- * hipMemcpyAsync
- */
-hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms* p, hipStream_t stream __dparm(0));
-
-/**
- * @brief Copies data between host and device.
- *
- * @param[in] pCopy 3D memory copy parameters
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
- * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
- *
- * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,
- * hipMemcpyAsync
- */
-hipError_t hipDrvMemcpy3D(const HIP_MEMCPY3D* pCopy);
-
-/**
- * @brief Copies data between host and device asynchronously.
- *
- * @param[in] pCopy 3D memory copy parameters
- * @param[in] stream Stream to use
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
- * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
- *
- * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,
- * hipMemcpyAsync
- */
-hipError_t hipDrvMemcpy3DAsync(const HIP_MEMCPY3D* pCopy, hipStream_t stream);
-
-// doxygen end Memory
-/**
- * @}
- */
-
-
-/**
- *-------------------------------------------------------------------------------------------------
- *-------------------------------------------------------------------------------------------------
- * @defgroup PeerToPeer PeerToPeer Device Memory Access
- * @{
- * @warning PeerToPeer support is experimental.
- * This section describes the PeerToPeer device memory access functions of HIP runtime API.
- */
-
-/**
- * @brief Determine if a device can access a peer's memory.
- *
- * @param [out] canAccessPeer Returns the peer access capability (0 or 1)
- * @param [in] device - device from where memory may be accessed.
- * @param [in] peerDevice - device where memory is physically located
- *
- * Returns "1" in @p canAccessPeer if the specified @p device is capable
- * of directly accessing memory physically located on peerDevice , or "0" if not.
- *
- * Returns "0" in @p canAccessPeer if deviceId == peerDeviceId, and both are valid devices : a
- * device is not a peer of itself.
- *
- * @returns #hipSuccess,
- * @returns #hipErrorInvalidDevice if deviceId or peerDeviceId are not valid devices
- */
-hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int deviceId, int peerDeviceId);
-
-
-/**
- * @brief Enable direct access from current device's virtual address space to memory allocations
- * physically located on a peer device.
- *
- * Memory which already allocated on peer device will be mapped into the address space of the
- * current device. In addition, all future memory allocations on peerDeviceId will be mapped into
- * the address space of the current device when the memory is allocated. The peer memory remains
- * accessible from the current device until a call to hipDeviceDisablePeerAccess or hipDeviceReset.
- *
- *
- * @param [in] peerDeviceId
- * @param [in] flags
- *
- * Returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue,
- * @returns #hipErrorPeerAccessAlreadyEnabled if peer access is already enabled for this device.
- */
-hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags);
-
-
-/**
- * @brief Disable direct access from current device's virtual address space to memory allocations
- * physically located on a peer device.
- *
- * Returns hipErrorPeerAccessNotEnabled if direct access to memory on peerDevice has not yet been
- * enabled from the current device.
- *
- * @param [in] peerDeviceId
- *
- * @returns #hipSuccess, #hipErrorPeerAccessNotEnabled
- */
-hipError_t hipDeviceDisablePeerAccess(int peerDeviceId);
-
-/**
- * @brief Get information on memory allocations.
- *
- * @param [out] pbase - BAse pointer address
- * @param [out] psize - Size of allocation
- * @param [in] dptr- Device Pointer
- *
- * @returns #hipSuccess, #hipErrorInvalidDevicePointer
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- */
-hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, hipDeviceptr_t dptr);
-
-#ifndef USE_PEER_NON_UNIFIED
-#define USE_PEER_NON_UNIFIED 1
-#endif
-
-#if USE_PEER_NON_UNIFIED == 1
-/**
- * @brief Copies memory from one device to memory on another device.
- *
- * @param [out] dst - Destination device pointer.
- * @param [in] dstDeviceId - Destination device
- * @param [in] src - Source device pointer
- * @param [in] srcDeviceId - Source device
- * @param [in] sizeBytes - Size of memory copy in bytes
- *
- * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice
- */
-hipError_t hipMemcpyPeer(void* dst, int dstDeviceId, const void* src, int srcDeviceId,
- size_t sizeBytes);
-
-/**
- * @brief Copies memory from one device to memory on another device.
- *
- * @param [out] dst - Destination device pointer.
- * @param [in] dstDevice - Destination device
- * @param [in] src - Source device pointer
- * @param [in] srcDevice - Source device
- * @param [in] sizeBytes - Size of memory copy in bytes
- * @param [in] stream - Stream identifier
- *
- * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice
- */
-hipError_t hipMemcpyPeerAsync(void* dst, int dstDeviceId, const void* src, int srcDevice,
- size_t sizeBytes, hipStream_t stream __dparm(0));
-#endif
-
-
-// doxygen end PeerToPeer
-/**
- * @}
- */
-
-
-/**
- *-------------------------------------------------------------------------------------------------
- *-------------------------------------------------------------------------------------------------
- * @defgroup Context Context Management
- * @{
- * This section describes the context management functions of HIP runtime API.
- */
-
-/**
- *
- * @addtogroup ContextD Context Management [Deprecated]
- * @{
- * @ingroup Context
- * This section describes the deprecated context management functions of HIP runtime API.
- */
-
-/**
- * @brief Create a context and set it as current/ default context
- *
- * @param [out] ctx
- * @param [in] flags
- * @param [in] associated device handle
- *
- * @return #hipSuccess
- *
- * @see hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, hipCtxPushCurrent,
- * hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxCreate(hipCtx_t* ctx, unsigned int flags, hipDevice_t device);
-
-/**
- * @brief Destroy a HIP context.
- *
- * @param [in] ctx Context to destroy
- *
- * @returns #hipSuccess, #hipErrorInvalidValue
- *
- * @see hipCtxCreate, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,hipCtxSetCurrent,
- * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize , hipCtxGetDevice
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxDestroy(hipCtx_t ctx);
-
-/**
- * @brief Pop the current/default context and return the popped context.
- *
- * @param [out] ctx
- *
- * @returns #hipSuccess, #hipErrorInvalidContext
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxSetCurrent, hipCtxGetCurrent,
- * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxPopCurrent(hipCtx_t* ctx);
-
-/**
- * @brief Push the context to be set as current/ default context
- *
- * @param [in] ctx
- *
- * @returns #hipSuccess, #hipErrorInvalidContext
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize , hipCtxGetDevice
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxPushCurrent(hipCtx_t ctx);
-
-/**
- * @brief Set the passed context as current/default
- *
- * @param [in] ctx
- *
- * @returns #hipSuccess, #hipErrorInvalidContext
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize , hipCtxGetDevice
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxSetCurrent(hipCtx_t ctx);
-
-/**
- * @brief Get the handle of the current/ default context
- *
- * @param [out] ctx
- *
- * @returns #hipSuccess, #hipErrorInvalidContext
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetDevice, hipCtxGetFlags, hipCtxPopCurrent,
- * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxGetCurrent(hipCtx_t* ctx);
-
-/**
- * @brief Get the handle of the device associated with current/default context
- *
- * @param [out] device
- *
- * @returns #hipSuccess, #hipErrorInvalidContext
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize
- */
-
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxGetDevice(hipDevice_t* device);
-
-/**
- * @brief Returns the approximate HIP api version.
- *
- * @param [in] ctx Context to check
- * @param [out] apiVersion
- *
- * @return #hipSuccess
- *
- * @warning The HIP feature set does not correspond to an exact CUDA SDK api revision.
- * This function always set *apiVersion to 4 as an approximation though HIP supports
- * some features which were introduced in later CUDA SDK revisions.
- * HIP apps code should not rely on the api revision number here and should
- * use arch feature flags to test device capabilities or conditional compilation.
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetDevice, hipCtxGetFlags, hipCtxPopCurrent,
- * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion);
-
-/**
- * @brief Set Cache configuration for a specific function
- *
- * @param [out] cacheConfiguration
- *
- * @return #hipSuccess
- *
- * @warning AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is
- * ignored on those architectures.
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxGetCacheConfig(hipFuncCache_t* cacheConfig);
-
-/**
- * @brief Set L1/Shared cache partition.
- *
- * @param [in] cacheConfiguration
- *
- * @return #hipSuccess
- *
- * @warning AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is
- * ignored on those architectures.
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig);
-
-/**
- * @brief Set Shared memory bank configuration.
- *
- * @param [in] sharedMemoryConfiguration
- *
- * @return #hipSuccess
- *
- * @warning AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is
- * ignored on those architectures.
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config);
-
-/**
- * @brief Get Shared memory bank configuration.
- *
- * @param [out] sharedMemoryConfiguration
- *
- * @return #hipSuccess
- *
- * @warning AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is
- * ignored on those architectures.
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig);
-
-/**
- * @brief Blocks until the default context has completed all preceding requested tasks.
- *
- * @return #hipSuccess
- *
- * @warning This function waits for all streams on the default context to complete execution, and
- * then returns.
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxGetDevice
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxSynchronize(void);
-
-/**
- * @brief Return flags used for creating default context.
- *
- * @param [out] flags
- *
- * @returns #hipSuccess
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxPopCurrent, hipCtxGetCurrent, hipCtxGetCurrent,
- * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxGetFlags(unsigned int* flags);
-
-/**
- * @brief Enables direct access to memory allocations in a peer context.
- *
- * Memory which already allocated on peer device will be mapped into the address space of the
- * current device. In addition, all future memory allocations on peerDeviceId will be mapped into
- * the address space of the current device when the memory is allocated. The peer memory remains
- * accessible from the current device until a call to hipDeviceDisablePeerAccess or hipDeviceReset.
- *
- *
- * @param [in] peerCtx
- * @param [in] flags
- *
- * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue,
- * #hipErrorPeerAccessAlreadyEnabled
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- * @warning PeerToPeer support is experimental.
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags);
-
-/**
- * @brief Disable direct access from current context's virtual address space to memory allocations
- * physically located on a peer context.Disables direct access to memory allocations in a peer
- * context and unregisters any registered allocations.
- *
- * Returns hipErrorPeerAccessNotEnabled if direct access to memory on peerDevice has not yet been
- * enabled from the current device.
- *
- * @param [in] peerCtx
- *
- * @returns #hipSuccess, #hipErrorPeerAccessNotEnabled
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- * @warning PeerToPeer support is experimental.
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx);
-
-// doxygen end Context deprecated
-/**
- * @}
- */
-
-/**
- * @brief Get the state of the primary context.
- *
- * @param [in] Device to get primary context flags for
- * @param [out] Pointer to store flags
- * @param [out] Pointer to store context state; 0 = inactive, 1 = active
- *
- * @returns #hipSuccess
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- */
-hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags, int* active);
-
-/**
- * @brief Release the primary context on the GPU.
- *
- * @param [in] Device which primary context is released
- *
- * @returns #hipSuccess
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- * @warning This function return #hipSuccess though doesn't release the primaryCtx by design on
- * HIP/HCC path.
- */
-hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev);
-
-/**
- * @brief Retain the primary context on the GPU.
- *
- * @param [out] Returned context handle of the new context
- * @param [in] Device which primary context is released
- *
- * @returns #hipSuccess
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- */
-hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev);
-
-/**
- * @brief Resets the primary context on the GPU.
- *
- * @param [in] Device which primary context is reset
- *
- * @returns #hipSuccess
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- */
-hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev);
-
-/**
- * @brief Set flags for the primary context.
- *
- * @param [in] Device for which the primary context flags are set
- * @param [in] New flags for the device
- *
- * @returns #hipSuccess, #hipErrorContextAlreadyInUse
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- */
-hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags);
-
-// doxygen end Context Management
-/**
- * @}
- */
-
-/**
- *
- * @defgroup Module Module Management
- * @{
- * This section describes the module management functions of HIP runtime API.
- *
- */
-
-/**
- * @brief Loads code object from file into a hipModule_t
- *
- * @param [in] fname
- * @param [out] module
- *
- * @returns hipSuccess, hipErrorInvalidValue, hipErrorInvalidContext, hipErrorFileNotFound,
- * hipErrorOutOfMemory, hipErrorSharedObjectInitFailed, hipErrorNotInitialized
- *
- *
- */
-hipError_t hipModuleLoad(hipModule_t* module, const char* fname);
-
-/**
- * @brief Frees the module
- *
- * @param [in] module
- *
- * @returns hipSuccess, hipInvalidValue
- * module is freed and the code objects associated with it are destroyed
- *
- */
-
-hipError_t hipModuleUnload(hipModule_t module);
-
-/**
- * @brief Function with kname will be extracted if present in module
- *
- * @param [in] module
- * @param [in] kname
- * @param [out] function
- *
- * @returns hipSuccess, hipErrorInvalidValue, hipErrorInvalidContext, hipErrorNotInitialized,
- * hipErrorNotFound,
- */
-hipError_t hipModuleGetFunction(hipFunction_t* function, hipModule_t module, const char* kname);
-
-/**
- * @brief Find out attributes for a given function.
- *
- * @param [out] attr
- * @param [in] func
- *
- * @returns hipSuccess, hipErrorInvalidValue, hipErrorInvalidDeviceFunction
- */
-
-hipError_t hipFuncGetAttributes(struct hipFuncAttributes* attr, const void* func);
-
-/**
- * @brief Find out a specific attribute for a given function.
- *
- * @param [out] value
- * @param [in] attrib
- * @param [in] hfunc
- *
- * @returns hipSuccess, hipErrorInvalidValue, hipErrorInvalidDeviceFunction
- */
-hipError_t hipFuncGetAttribute(int* value, hipFunction_attribute attrib, hipFunction_t hfunc);
-
-#if !__HIP_ROCclr__
-#if defined(__cplusplus)
-} // extern "C"
-#endif
-
-#ifdef __cplusplus
-namespace hip_impl {
- class agent_globals_impl;
- class agent_globals {
- public:
- agent_globals();
- ~agent_globals();
- agent_globals(const agent_globals&) = delete;
-
- hipError_t read_agent_global_from_module(hipDeviceptr_t* dptr, size_t* bytes,
- hipModule_t hmod, const char* name);
- hipError_t read_agent_global_from_process(hipDeviceptr_t* dptr, size_t* bytes,
- const char* name);
- private:
- agent_globals_impl* impl;
- };
-
- inline
- __attribute__((visibility("hidden")))
- agent_globals& get_agent_globals() {
- static agent_globals ag;
- return ag;
- }
-
- extern "C"
- inline
- __attribute__((visibility("hidden")))
- hipError_t read_agent_global_from_process(hipDeviceptr_t* dptr, size_t* bytes,
- const char* name) {
- return get_agent_globals().read_agent_global_from_process(dptr, bytes, name);
- }
-} // Namespace hip_impl.
-#endif
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-/**
- * @brief returns device memory pointer and size of the kernel present in the module with symbol @p
- * name
- *
- * @param [out] dptr
- * @param [out] bytes
- * @param [in] hmod
- * @param [in] name
- *
- * @returns hipSuccess, hipErrorInvalidValue, hipErrorNotInitialized
- */
-hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes,
- hipModule_t hmod, const char* name);
-#endif // __HIP_ROCclr__
-
-/**
- * @brief returns the handle of the texture reference with the name from the module.
- *
- * @param [in] hmod
- * @param [in] name
- * @param [out] texRef
- *
- * @returns hipSuccess, hipErrorNotInitialized, hipErrorNotFound, hipErrorInvalidValue
- */
-hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const char* name);
-
-/**
- * @brief builds module from code object which resides in host memory. Image is pointer to that
- * location.
- *
- * @param [in] image
- * @param [out] module
- *
- * @returns hipSuccess, hipErrorNotInitialized, hipErrorOutOfMemory, hipErrorNotInitialized
- */
-hipError_t hipModuleLoadData(hipModule_t* module, const void* image);
-
-/**
- * @brief builds module from code object which resides in host memory. Image is pointer to that
- * location. Options are not used. hipModuleLoadData is called.
- *
- * @param [in] image
- * @param [out] module
- * @param [in] number of options
- * @param [in] options for JIT
- * @param [in] option values for JIT
- *
- * @returns hipSuccess, hipErrorNotInitialized, hipErrorOutOfMemory, hipErrorNotInitialized
- */
-hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image, unsigned int numOptions,
- hipJitOption* options, void** optionValues);
-
-/**
- * @brief launches kernel f with launch parameters and shared memory on stream with arguments passed
- * to kernelparams or extra
- *
- * @param [in] f Kernel to launch.
- * @param [in] gridDimX X grid dimension specified as multiple of blockDimX.
- * @param [in] gridDimY Y grid dimension specified as multiple of blockDimY.
- * @param [in] gridDimZ Z grid dimension specified as multiple of blockDimZ.
- * @param [in] blockDimX X block dimensions specified in work-items
- * @param [in] blockDimY Y grid dimension specified in work-items
- * @param [in] blockDimZ Z grid dimension specified in work-items
- * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The
- * kernel can access this with HIP_DYNAMIC_SHARED.
- * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th
- * default stream is used with associated synchronization rules.
- * @param [in] kernelParams
- * @param [in] extra Pointer to kernel arguments. These are passed directly to the kernel and
- * must be in the memory layout and alignment expected by the kernel.
- *
- * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue
- *
- * @warning kernellParams argument is not yet implemented in HIP. Please use extra instead. Please
- * refer to hip_porting_driver_api.md for sample usage.
- */
-hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY,
- unsigned int gridDimZ, unsigned int blockDimX,
- unsigned int blockDimY, unsigned int blockDimZ,
- unsigned int sharedMemBytes, hipStream_t stream,
- void** kernelParams, void** extra);
-
-
-#if __HIP_ROCclr__ && !defined(__HCC__)
-/**
- * @brief launches kernel f with launch parameters and shared memory on stream with arguments passed
- * to kernelparams or extra, where thread blocks can cooperate and synchronize as they execute
- *
- * @param [in] f Kernel to launch.
- * @param [in] gridDim Grid dimensions specified as multiple of blockDim.
- * @param [in] blockDim Block dimensions specified in work-items
- * @param [in] kernelParams A list of kernel arguments
- * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The
- * kernel can access this with HIP_DYNAMIC_SHARED.
- * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th
- * default stream is used with associated synchronization rules.
- *
- * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue, hipErrorCooperativeLaunchTooLarge
- */
-hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDimX,
- void** kernelParams, unsigned int sharedMemBytes,
- hipStream_t stream);
-
-/**
- * @brief Launches kernels on multiple devices where thread blocks can cooperate and
- * synchronize as they execute.
- *
- * @param [in] hipLaunchParams List of launch parameters, one per device.
- * @param [in] numDevices Size of the launchParamsList array.
- * @param [in] flags Flags to control launch behavior.
- *
- * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue, hipErrorCooperativeLaunchTooLarge
- */
-hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
- int numDevices, unsigned int flags);
-
-#endif
-
-/**
- * @brief Launches kernels on multiple devices and guarantees all specified kernels are dispatched
- * on respective streams before enqueuing any other work on the specified streams from any other threads
- *
- *
- * @param [in] hipLaunchParams List of launch parameters, one per device.
- * @param [in] numDevices Size of the launchParamsList array.
- * @param [in] flags Flags to control launch behavior.
- *
- * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue
- */
-hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList,
- int numDevices, unsigned int flags);
-
-
-// doxygen end Module
-/**
- * @}
- */
-
-/**
- *
- * @defgroup Occupancy Occupancy
- * @{
- * This section describes the occupancy functions of HIP runtime API.
- *
- */
-
-/**
- * @brief determine the grid and block sizes to achieves maximum occupancy for a kernel
- *
- * @param [out] gridSize minimum grid size for maximum potential occupancy
- * @param [out] blockSize block size for maximum potential occupancy
- * @param [in] f kernel function for which occupancy is calulated
- * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block
- * @param [in] blockSizeLimit the maximum block size for the kernel, use 0 for no limit
- *
- * @returns hipSuccess, hipInvalidDevice, hipErrorInvalidValue
- */
-
-//TODO - Match CUoccupancyB2DSize
-hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
- hipFunction_t f, size_t dynSharedMemPerBlk,
- int blockSizeLimit);
-
-/**
- * @brief determine the grid and block sizes to achieves maximum occupancy for a kernel
- *
- * @param [out] gridSize minimum grid size for maximum potential occupancy
- * @param [out] blockSize block size for maximum potential occupancy
- * @param [in] f kernel function for which occupancy is calulated
- * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block
- * @param [in] blockSizeLimit the maximum block size for the kernel, use 0 for no limit
- * @param [in] flags Extra flags for occupancy calculation (only default supported)
- *
- * @returns hipSuccess, hipInvalidDevice, hipErrorInvalidValue
- */
-//TODO - Match CUoccupancyB2DSize
-hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize,
- hipFunction_t f, size_t dynSharedMemPerBlk,
- int blockSizeLimit, unsigned int flags);
-
-/**
- * @brief Returns occupancy for a device function.
- *
- * @param [out] numBlocks Returned occupancy
- * @param [in] func Kernel function (hipFunction) for which occupancy is calulated
- * @param [in] blockSize Block size the kernel is intended to be launched with
- * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block
- */
-hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(
- int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk);
-
-/**
- * @brief Returns occupancy for a device function.
- *
- * @param [out] numBlocks Returned occupancy
- * @param [in] f Kernel function(hipFunction_t) for which occupancy is calulated
- * @param [in] blockSize Block size the kernel is intended to be launched with
- * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block
- * @param [in] flags Extra flags for occupancy calculation (only default supported)
- */
-hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
- int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags);
-
-/**
- * @brief Returns occupancy for a device function.
- *
- * @param [out] numBlocks Returned occupancy
- * @param [in] func Kernel function for which occupancy is calulated
- * @param [in] blockSize Block size the kernel is intended to be launched with
- * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block
- */
-hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(
- int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk);
-
-/**
- * @brief Returns occupancy for a device function.
- *
- * @param [out] numBlocks Returned occupancy
- * @param [in] f Kernel function for which occupancy is calulated
- * @param [in] blockSize Block size the kernel is intended to be launched with
- * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block
- * @param [in] flags Extra flags for occupancy calculation (currently ignored)
- */
-hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
- int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags __dparm(hipOccupancyDefault));
-
-/**
- * @brief determine the grid and block sizes to achieves maximum occupancy for a kernel
- *
- * @param [out] gridSize minimum grid size for maximum potential occupancy
- * @param [out] blockSize block size for maximum potential occupancy
- * @param [in] f kernel function for which occupancy is calulated
- * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block
- * @param [in] blockSizeLimit the maximum block size for the kernel, use 0 for no limit
- *
- * @returns hipSuccess, hipInvalidDevice, hipErrorInvalidValue
- */
-hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
- const void* f, size_t dynSharedMemPerBlk,
- int blockSizeLimit);
-
-// doxygen end Occupancy
-/**
- * @}
- */
-
-
-/**
- *-------------------------------------------------------------------------------------------------
- *-------------------------------------------------------------------------------------------------
- * @defgroup Profiler Profiler Control[Deprecated]
- * @{
- * This section describes the profiler control functions of HIP runtime API.
- *
- * @warning The cudaProfilerInitialize API format for "configFile" is not supported.
- *
- */
-
-
-// TODO - expand descriptions:
-/**
- * @brief Start recording of profiling information
- * When using this API, start the profiler with profiling disabled. (--startdisabled)
- * @warning : hipProfilerStart API is under development.
- */
-DEPRECATED("use roctracer/rocTX instead")
-hipError_t hipProfilerStart();
-
-
-/**
- * @brief Stop recording of profiling information.
- * When using this API, start the profiler with profiling disabled. (--startdisabled)
- * @warning : hipProfilerStop API is under development.
- */
-DEPRECATED("use roctracer/rocTX instead")
-hipError_t hipProfilerStop();
-
-// doxygen end profiler
-/**
- * @}
- */
-
-/**
- *-------------------------------------------------------------------------------------------------
- *-------------------------------------------------------------------------------------------------
- * @defgroup Clang Launch API to support the triple-chevron syntax
- * @{
- * This section describes the API to support the triple-chevron syntax.
- */
-
-/**
- * @brief Configure a kernel launch.
- *
- * @param [in] gridDim grid dimension specified as multiple of blockDim.
- * @param [in] blockDim block dimensions specified in work-items
- * @param [in] sharedMem Amount of dynamic shared memory to allocate for this kernel. The
- * kernel can access this with HIP_DYNAMIC_SHARED.
- * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case the
- * default stream is used with associated synchronization rules.
- *
- * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue
- *
- */
-hipError_t hipConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem __dparm(0), hipStream_t stream __dparm(0));
-
-
-/**
- * @brief Set a kernel argument.
- *
- * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue
- *
- * @param [in] arg Pointer the argument in host memory.
- * @param [in] size Size of the argument.
- * @param [in] offset Offset of the argument on the argument stack.
- *
- */
-hipError_t hipSetupArgument(const void* arg, size_t size, size_t offset);
-
-
-/**
- * @brief Launch a kernel.
- *
- * @param [in] func Kernel to launch.
- *
- * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue
- *
- */
-hipError_t hipLaunchByPtr(const void* func);
-
-
-/**
- * @brief Push configuration of a kernel launch.
- *
- * @param [in] gridDim grid dimension specified as multiple of blockDim.
- * @param [in] blockDim block dimensions specified in work-items
- * @param [in] sharedMem Amount of dynamic shared memory to allocate for this kernel. The
- * kernel can access this with HIP_DYNAMIC_SHARED.
- * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case the
- * default stream is used with associated synchronization rules.
- *
- * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue
- *
- */
-
-hipError_t __hipPushCallConfiguration(dim3 gridDim,
- dim3 blockDim,
- size_t sharedMem __dparm(0),
- hipStream_t stream __dparm(0));
-
-/**
- * @brief Pop configuration of a kernel launch.
- *
- * @param [out] gridDim grid dimension specified as multiple of blockDim.
- * @param [out] blockDim block dimensions specified in work-items
- * @param [out] sharedMem Amount of dynamic shared memory to allocate for this kernel. The
- * kernel can access this with HIP_DYNAMIC_SHARED.
- * @param [out] stream Stream where the kernel should be dispatched. May be 0, in which case the
- * default stream is used with associated synchronization rules.
- *
- * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue
- *
- */
-hipError_t __hipPopCallConfiguration(dim3 *gridDim,
- dim3 *blockDim,
- size_t *sharedMem,
- hipStream_t *stream);
-
-/**
- * @brief C compliant kernel launch API
- *
- * @param [in] function_address - kernel stub function pointer.
- * @param [in] numBlocks - number of blocks
- * @param [in] dimBlocks - dimension of a block
- * @param [in] args - kernel arguments
- * @param [in] sharedMemBytes - Amount of dynamic shared memory to allocate for this kernel. The
- * Kernel can access this with HIP_DYNAMIC_SHARED.
- * @param [in] stream - Stream where the kernel should be dispatched. May be 0, in which case th
- * default stream is used with associated synchronization rules.
- *
- * @returns #hipSuccess, #hipErrorInvalidValue, hipInvalidDevice
- *
- */
-hipError_t hipLaunchKernel(const void* function_address,
- dim3 numBlocks,
- dim3 dimBlocks,
- void** args,
- size_t sharedMemBytes __dparm(0),
- hipStream_t stream __dparm(0));
-
-#if __HIP_ROCclr__ || !defined(__HCC__)
-//TODO: Move this to hip_ext.h
-hipError_t hipExtLaunchKernel(const void* function_address, dim3 numBlocks, dim3 dimBlocks,
- void** args, size_t sharedMemBytes, hipStream_t stream,
- hipEvent_t startEvent, hipEvent_t stopEvent, int flags);
-// doxygen end Clang launch
-/**
- * @}
- */
-
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipBindTexture(
- size_t* offset,
- const textureReference* tex,
- const void* devPtr,
- const hipChannelFormatDesc* desc,
- size_t size __dparm(UINT_MAX));
-
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipBindTexture2D(
- size_t* offset,
- const textureReference* tex,
- const void* devPtr,
- const hipChannelFormatDesc* desc,
- size_t width,
- size_t height,
- size_t pitch);
-
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipBindTextureToArray(
- const textureReference* tex,
- hipArray_const_t array,
- const hipChannelFormatDesc* desc);
-
-hipError_t hipBindTextureToMipmappedArray(
- const textureReference* tex,
- hipMipmappedArray_const_t mipmappedArray,
- const hipChannelFormatDesc* desc);
-
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipGetTextureAlignmentOffset(
- size_t* offset,
- const textureReference* texref);
-
-hipError_t hipGetTextureReference(
- const textureReference** texref,
- const void* symbol);
-
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipUnbindTexture(const textureReference* tex);
-
-hipError_t hipCreateTextureObject(
- hipTextureObject_t* pTexObject,
- const hipResourceDesc* pResDesc,
- const hipTextureDesc* pTexDesc,
- const struct hipResourceViewDesc* pResViewDesc);
-
-hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject);
-
-hipError_t hipGetChannelDesc(
- hipChannelFormatDesc* desc,
- hipArray_const_t array);
-
-hipError_t hipGetTextureObjectResourceDesc(
- hipResourceDesc* pResDesc,
- hipTextureObject_t textureObject);
-
-hipError_t hipGetTextureObjectResourceViewDesc(
- struct hipResourceViewDesc* pResViewDesc,
- hipTextureObject_t textureObject);
-
-hipError_t hipGetTextureObjectTextureDesc(
- hipTextureDesc* pTexDesc,
- hipTextureObject_t textureObject);
-
-hipError_t hipTexRefGetAddress(
- hipDeviceptr_t* dev_ptr,
- const textureReference* texRef);
-
-hipError_t hipTexRefGetAddressMode(
- enum hipTextureAddressMode* pam,
- const textureReference* texRef,
- int dim);
-
-hipError_t hipTexRefGetFilterMode(
- enum hipTextureFilterMode* pfm,
- const textureReference* texRef);
-
-hipError_t hipTexRefGetFlags(
- unsigned int* pFlags,
- const textureReference* texRef);
-
-hipError_t hipTexRefGetFormat(
- hipArray_Format* pFormat,
- int* pNumChannels,
- const textureReference* texRef);
-
-hipError_t hipTexRefGetMaxAnisotropy(
- int* pmaxAnsio,
- const textureReference* texRef);
-
-hipError_t hipTexRefGetMipmapFilterMode(
- enum hipTextureFilterMode* pfm,
- const textureReference* texRef);
-
-hipError_t hipTexRefGetMipmapLevelBias(
- float* pbias,
- const textureReference* texRef);
-
-hipError_t hipTexRefGetMipmapLevelClamp(
- float* pminMipmapLevelClamp,
- float* pmaxMipmapLevelClamp,
- const textureReference* texRef);
-
-hipError_t hipTexRefGetMipMappedArray(
- hipMipmappedArray_t* pArray,
- const textureReference* texRef);
-
-hipError_t hipTexRefSetAddress(
- size_t* ByteOffset,
- textureReference* texRef,
- hipDeviceptr_t dptr,
- size_t bytes);
-
-hipError_t hipTexRefSetAddress2D(
- textureReference* texRef,
- const HIP_ARRAY_DESCRIPTOR* desc,
- hipDeviceptr_t dptr,
- size_t Pitch);
-
-hipError_t hipTexRefSetAddressMode(
- textureReference* texRef,
- int dim,
- enum hipTextureAddressMode am);
-
-hipError_t hipTexRefSetArray(
- textureReference* tex,
- hipArray_const_t array,
- unsigned int flags);
-
-hipError_t hipTexRefSetBorderColor(
- textureReference* texRef,
- float* pBorderColor);
-
-hipError_t hipTexRefSetFilterMode(
- textureReference* texRef,
- enum hipTextureFilterMode fm);
-
-hipError_t hipTexRefSetFlags(
- textureReference* texRef,
- unsigned int Flags);
-
-hipError_t hipTexRefSetFormat(
- textureReference* texRef,
- hipArray_Format fmt,
- int NumPackedComponents);
-
-hipError_t hipTexRefSetMaxAnisotropy(
- textureReference* texRef,
- unsigned int maxAniso);
-
-hipError_t hipTexRefSetMipmapFilterMode(
- textureReference* texRef,
- enum hipTextureFilterMode fm);
-
-hipError_t hipTexRefSetMipmapLevelBias(
- textureReference* texRef,
- float bias);
-
-hipError_t hipTexRefSetMipmapLevelClamp(
- textureReference* texRef,
- float minMipMapLevelClamp,
- float maxMipMapLevelClamp);
-
-hipError_t hipTexRefSetMipmappedArray(
- textureReference* texRef,
- struct hipMipmappedArray* mipmappedArray,
- unsigned int Flags);
-
-hipError_t hipMipmappedArrayCreate(
- hipMipmappedArray_t* pHandle,
- HIP_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc,
- unsigned int numMipmapLevels);
-
-hipError_t hipMipmappedArrayDestroy(
- hipMipmappedArray_t hMipmappedArray);
-
-hipError_t hipMipmappedArrayGetLevel(
- hipArray_t* pLevelArray,
- hipMipmappedArray_t hMipMappedArray,
- unsigned int level);
-
-hipError_t hipTexObjectCreate(
- hipTextureObject_t* pTexObject,
- const HIP_RESOURCE_DESC* pResDesc,
- const HIP_TEXTURE_DESC* pTexDesc,
- const HIP_RESOURCE_VIEW_DESC* pResViewDesc);
-
-hipError_t hipTexObjectDestroy(
- hipTextureObject_t texObject);
-
-hipError_t hipTexObjectGetResourceDesc(
- HIP_RESOURCE_DESC* pResDesc,
- hipTextureObject_t texObject);
-
-hipError_t hipTexObjectGetResourceViewDesc(
- HIP_RESOURCE_VIEW_DESC* pResViewDesc,
- hipTextureObject_t texObject);
-
-hipError_t hipTexObjectGetTextureDesc(
- HIP_TEXTURE_DESC* pTexDesc,
- hipTextureObject_t texObject);
-#endif
-
-/**
- * @}
- */
-
-
-#ifdef __cplusplus
-} /* extern "c" */
-#endif
-
-#if defined(__cplusplus) && !defined(__HCC__) && defined(__clang__) && defined(__HIP__)
-template <typename T>
-static hipError_t __host__ inline hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
- T f, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0) {
- return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize, reinterpret_cast<const void*>(f),dynSharedMemPerBlk,blockSizeLimit);
-}
-
-template <typename T>
-static hipError_t __host__ inline hipOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize,
- T f, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0, unsigned int flags = 0 ) {
- return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize, reinterpret_cast<const void*>(f),dynSharedMemPerBlk,blockSizeLimit);
-}
-#endif // defined(__cplusplus) && !defined(__HCC__) && defined(__clang__) && defined(__HIP__)
-
-#if defined(__cplusplus) && !defined(__HCC__)
-
-template <typename T>
-hipError_t hipGetSymbolAddress(void** devPtr, const T &symbol) {
- return ::hipGetSymbolAddress(devPtr, (const void *)&symbol);
-}
-
-template <typename T>
-hipError_t hipGetSymbolSize(size_t* size, const T &symbol) {
- return ::hipGetSymbolSize(size, (const void *)&symbol);
-}
-
-template <typename T>
-hipError_t hipMemcpyToSymbol(const T& symbol, const void* src, size_t sizeBytes,
- size_t offset __dparm(0),
- hipMemcpyKind kind __dparm(hipMemcpyHostToDevice)) {
- return ::hipMemcpyToSymbol((const void*)&symbol, src, sizeBytes, offset, kind);
-}
-
-template <typename T>
-hipError_t hipMemcpyToSymbolAsync(const T& symbol, const void* src, size_t sizeBytes, size_t offset,
- hipMemcpyKind kind, hipStream_t stream __dparm(0)) {
- return ::hipMemcpyToSymbolAsync((const void*)&symbol, src, sizeBytes, offset, kind, stream);
-}
-
-template <typename T>
-hipError_t hipMemcpyFromSymbol(void* dst, const T &symbol,
- size_t sizeBytes, size_t offset __dparm(0),
- hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)) {
- return ::hipMemcpyFromSymbol(dst, (const void*)&symbol, sizeBytes, offset, kind);
-}
-
-template <typename T>
-hipError_t hipMemcpyFromSymbolAsync(void* dst, const T& symbol, size_t sizeBytes, size_t offset,
- hipMemcpyKind kind, hipStream_t stream __dparm(0)) {
- return ::hipMemcpyFromSymbolAsync(dst, (const void*)&symbol, sizeBytes, offset, kind, stream);
-}
-
-#endif
-
-#if USE_PROF_API
-#include <hip/hcc_detail/hip_prof_str.h>
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-/**
- * Callback/Activity API
- */
-hipError_t hipRegisterApiCallback(uint32_t id, void* fun, void* arg);
-hipError_t hipRemoveApiCallback(uint32_t id);
-hipError_t hipRegisterActivityCallback(uint32_t id, void* fun, void* arg);
-hipError_t hipRemoveActivityCallback(uint32_t id);
-const char* hipApiName(uint32_t id);
-const char* hipKernelNameRef(const hipFunction_t f);
-const char* hipKernelNameRefByPtr(const void* hostFunction, hipStream_t stream);
-int hipGetStreamDeviceId(hipStream_t stream);
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#ifdef __cplusplus
-
-template <class T>
-inline hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(
- int* numBlocks, T f, int blockSize, size_t dynSharedMemPerBlk) {
- return hipOccupancyMaxActiveBlocksPerMultiprocessor(
- numBlocks, reinterpret_cast<const void*>(f), blockSize, dynSharedMemPerBlk);
-}
-
-template <class T>
-inline hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
- int* numBlocks, T f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags) {
- return hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
- numBlocks, reinterpret_cast<const void*>(f), blockSize, dynSharedMemPerBlk, flags);
-}
-
-class TlsData;
-
-#if !__HIP_ROCclr__
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* devPtr,
- const hipChannelFormatDesc* desc, size_t size = UINT_MAX);
-#endif
-
-#if !__HIP_ROCclr__
-hipError_t ihipBindTextureImpl(TlsData *tls, int dim, enum hipTextureReadMode readMode, size_t* offset,
- const void* devPtr, const struct hipChannelFormatDesc* desc,
- size_t size, textureReference* tex);
-#endif
-
-/*
- * @brief hipBindTexture Binds size bytes of the memory area pointed to by @p devPtr to the texture
- *reference tex.
- *
- * @p desc describes how the memory is interpreted when fetching values from the texture. The @p
- *offset parameter is an optional byte offset as with the low-level hipBindTexture() function. Any
- *memory previously bound to tex is unbound.
- *
- * @param[in] offset - Offset in bytes
- * @param[out] tex - texture to bind
- * @param[in] devPtr - Memory area on device
- * @param[in] desc - Channel format
- * @param[in] size - Size of the memory area pointed to by devPtr
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown
- **/
-#if !__HIP_ROCclr__
-template <class T, int dim, enum hipTextureReadMode readMode>
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipBindTexture(size_t* offset, struct texture<T, dim, readMode>& tex, const void* devPtr,
- const struct hipChannelFormatDesc& desc, size_t size = UINT_MAX) {
- return ihipBindTextureImpl(nullptr, dim, readMode, offset, devPtr, &desc, size, &tex);
-}
-#endif
-
-/*
- * @brief hipBindTexture Binds size bytes of the memory area pointed to by @p devPtr to the texture
- *reference tex.
- *
- * @p desc describes how the memory is interpreted when fetching values from the texture. The @p
- *offset parameter is an optional byte offset as with the low-level hipBindTexture() function. Any
- *memory previously bound to tex is unbound.
- *
- * @param[in] offset - Offset in bytes
- * @param[in] tex - texture to bind
- * @param[in] devPtr - Memory area on device
- * @param[in] size - Size of the memory area pointed to by devPtr
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown
- **/
-#if !__HIP_ROCclr__
-template <class T, int dim, enum hipTextureReadMode readMode>
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipBindTexture(size_t* offset, struct texture<T, dim, readMode>& tex, const void* devPtr,
- size_t size = UINT_MAX) {
- return ihipBindTextureImpl(nullptr, dim, readMode, offset, devPtr, &(tex.channelDesc), size, &tex);
-}
-#endif
-
-// C API
-#if !__HIP_ROCclr__
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipBindTexture2D(size_t* offset, textureReference* tex, const void* devPtr,
- const hipChannelFormatDesc* desc, size_t width, size_t height,
- size_t pitch);
-#endif
-
-#if !__HIP_ROCclr__
-hipError_t ihipBindTexture2DImpl(int dim, enum hipTextureReadMode readMode, size_t* offset,
- const void* devPtr, const struct hipChannelFormatDesc* desc,
- size_t width, size_t height, textureReference* tex, size_t pitch);
-#endif
-
-#if !__HIP_ROCclr__
-template <class T, int dim, enum hipTextureReadMode readMode>
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipBindTexture2D(size_t* offset, struct texture<T, dim, readMode>& tex,
- const void* devPtr, size_t width, size_t height, size_t pitch) {
- return ihipBindTexture2DImpl(dim, readMode, offset, devPtr, &(tex.channelDesc), width, height,
- &tex);
-}
-#endif
-
-#if !__HIP_ROCclr__
-template <class T, int dim, enum hipTextureReadMode readMode>
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipBindTexture2D(size_t* offset, struct texture<T, dim, readMode>& tex,
- const void* devPtr, const struct hipChannelFormatDesc& desc,
- size_t width, size_t height, size_t pitch) {
- return ihipBindTexture2DImpl(dim, readMode, offset, devPtr, &desc, width, height, &tex);
-}
-#endif
-
-// C API
-#if !__HIP_ROCclr__
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array,
- const hipChannelFormatDesc* desc);
-#endif
-
-#if !__HIP_ROCclr__
-hipError_t ihipBindTextureToArrayImpl(TlsData *tls, int dim, enum hipTextureReadMode readMode,
- hipArray_const_t array,
- const struct hipChannelFormatDesc& desc,
- textureReference* tex);
-#endif
-
-#if !__HIP_ROCclr__
-template <class T, int dim, enum hipTextureReadMode readMode>
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipBindTextureToArray(struct texture<T, dim, readMode>& tex, hipArray_const_t array) {
- return ihipBindTextureToArrayImpl(nullptr, dim, readMode, array, tex.channelDesc, &tex);
-}
-#endif
-
-#if !__HIP_ROCclr__
-template <class T, int dim, enum hipTextureReadMode readMode>
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipBindTextureToArray(struct texture<T, dim, readMode>& tex, hipArray_const_t array,
- const struct hipChannelFormatDesc& desc) {
- return ihipBindTextureToArrayImpl(nullptr, dim, readMode, array, desc, &tex);
-}
-#endif
-
-#if !__HIP_ROCclr__
-template <class T, int dim, enum hipTextureReadMode readMode>
-DEPRECATED(DEPRECATED_MSG)
-inline static hipError_t hipBindTextureToArray(struct texture<T, dim, readMode> *tex,
- hipArray_const_t array,
- const struct hipChannelFormatDesc* desc) {
- return ihipBindTextureToArrayImpl(nullptr, dim, readMode, array, *desc, tex);
-}
-#endif
-
-// C API
-#if !__HIP_ROCclr__
-hipError_t hipBindTextureToMipmappedArray(const textureReference* tex,
- hipMipmappedArray_const_t mipmappedArray,
- const hipChannelFormatDesc* desc);
-#endif
-
-#if !__HIP_ROCclr__
-template <class T, int dim, enum hipTextureReadMode readMode>
-hipError_t hipBindTextureToMipmappedArray(const texture<T, dim, readMode>& tex,
- hipMipmappedArray_const_t mipmappedArray) {
- return hipSuccess;
-}
-#endif
-
-#if !__HIP_ROCclr__
-template <class T, int dim, enum hipTextureReadMode readMode>
-hipError_t hipBindTextureToMipmappedArray(const texture<T, dim, readMode>& tex,
- hipMipmappedArray_const_t mipmappedArray,
- const hipChannelFormatDesc& desc) {
- return hipSuccess;
-}
-#endif
-
-#if __HIP_ROCclr__ && !defined(__HCC__)
-
-template <typename F>
-inline hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
- F kernel, size_t dynSharedMemPerBlk, uint32_t blockSizeLimit) {
-return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize,(hipFunction_t)kernel, dynSharedMemPerBlk, blockSizeLimit);
-}
-
-template <class T>
-inline hipError_t hipLaunchCooperativeKernel(T f, dim3 gridDim, dim3 blockDim,
- void** kernelParams, unsigned int sharedMemBytes, hipStream_t stream) {
- return hipLaunchCooperativeKernel(reinterpret_cast<const void*>(f), gridDim,
- blockDim, kernelParams, sharedMemBytes, stream);
-}
-
-template <class T>
-inline hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
- unsigned int numDevices, unsigned int flags = 0) {
- return hipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags);
-}
-
-
-template <class T>
-inline hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList,
- unsigned int numDevices, unsigned int flags = 0) {
- return hipExtLaunchMultiKernelMultiDevice(launchParamsList, numDevices, flags);
-}
-
-#endif
-
-/*
- * @brief Unbinds the textuer bound to @p tex
- *
- * @param[in] tex - texture to unbind
- *
- * @return #hipSuccess
- **/
-#if !__HIP_ROCclr__
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipUnbindTexture(const textureReference* tex);
-#endif
-
-#if !__HIP_ROCclr__
-extern hipError_t ihipUnbindTextureImpl(const hipTextureObject_t& textureObject);
-#endif
-
-#if !__HIP_ROCclr__
-template <class T, int dim, enum hipTextureReadMode readMode>
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipUnbindTexture(struct texture<T, dim, readMode>& tex) {
- return ihipUnbindTextureImpl(tex.textureObject);
-}
-#endif
-
-#if !__HIP_ROCclr__
-hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array);
-
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* texref);
-
-hipError_t hipGetTextureReference(const textureReference** texref, const void* symbol);
-
-hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResourceDesc* pResDesc,
- const hipTextureDesc* pTexDesc,
- const hipResourceViewDesc* pResViewDesc);
-
-hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject);
-
-hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc,
- hipTextureObject_t textureObject);
-hipError_t hipGetTextureObjectResourceViewDesc(hipResourceViewDesc* pResViewDesc,
- hipTextureObject_t textureObject);
-hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc,
- hipTextureObject_t textureObject);
-hipError_t hipTexRefSetArray(textureReference* tex, hipArray_const_t array, unsigned int flags);
-
-hipError_t hipTexRefGetArray(hipArray_t* array, textureReference tex);
-
-hipError_t hipTexRefSetAddressMode(textureReference* tex, int dim, hipTextureAddressMode am);
-
-hipError_t hipTexRefGetAddressMode(hipTextureAddressMode* am, textureReference tex, int dim);
-
-hipError_t hipTexRefSetFilterMode(textureReference* tex, hipTextureFilterMode fm);
-
-hipError_t hipTexRefSetFlags(textureReference* tex, unsigned int flags);
-
-hipError_t hipTexRefSetFormat(textureReference* tex, hipArray_Format fmt, int NumPackedComponents);
-
-hipError_t hipTexRefSetAddress(size_t* offset, textureReference* tex, hipDeviceptr_t devPtr,
- size_t size);
-
-hipError_t hipTexRefGetAddress(hipDeviceptr_t* dev_ptr, textureReference tex);
-
-hipError_t hipTexRefSetAddress2D(textureReference* tex, const HIP_ARRAY_DESCRIPTOR* desc,
- hipDeviceptr_t devPtr, size_t pitch);
-#endif
-
-hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject, const hipResourceDesc* pResDesc);
-
-hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject);
-
-#if __HIP_ROCclr__
-template <class T, int dim, enum hipTextureReadMode readMode>
-DEPRECATED(DEPRECATED_MSG)
-static inline hipError_t hipBindTexture(size_t* offset, const struct texture<T, dim, readMode>& tex,
- const void* devPtr, size_t size = UINT_MAX) {
- return hipBindTexture(offset, &tex, devPtr, &tex.channelDesc, size);
-}
-
-template <class T, int dim, enum hipTextureReadMode readMode>
-DEPRECATED(DEPRECATED_MSG)
-static inline hipError_t
- hipBindTexture(size_t* offset, const struct texture<T, dim, readMode>& tex, const void* devPtr,
- const struct hipChannelFormatDesc& desc, size_t size = UINT_MAX) {
- return hipBindTexture(offset, &tex, devPtr, &desc, size);
-}
-
-template<class T, int dim, enum hipTextureReadMode readMode>
-DEPRECATED(DEPRECATED_MSG)
-static inline hipError_t hipBindTexture2D(
- size_t *offset,
- const struct texture<T, dim, readMode> &tex,
- const void *devPtr,
- size_t width,
- size_t height,
- size_t pitch)
-{
- return hipBindTexture2D(offset, &tex, devPtr, &tex.channelDesc, width, height, pitch);
-}
-
-template<class T, int dim, enum hipTextureReadMode readMode>
-DEPRECATED(DEPRECATED_MSG)
-static inline hipError_t hipBindTexture2D(
- size_t *offset,
- const struct texture<T, dim, readMode> &tex,
- const void *devPtr,
- const struct hipChannelFormatDesc &desc,
- size_t width,
- size_t height,
- size_t pitch)
-{
- return hipBindTexture2D(offset, &tex, devPtr, &desc, width, height, pitch);
-}
-
-template<class T, int dim, enum hipTextureReadMode readMode>
-DEPRECATED(DEPRECATED_MSG)
-static inline hipError_t hipBindTextureToArray(
- const struct texture<T, dim, readMode> &tex,
- hipArray_const_t array)
-{
- struct hipChannelFormatDesc desc;
- hipError_t err = hipGetChannelDesc(&desc, array);
- return (err == hipSuccess) ? hipBindTextureToArray(&tex, array, &desc) : err;
-}
-
-template<class T, int dim, enum hipTextureReadMode readMode>
-DEPRECATED(DEPRECATED_MSG)
-static inline hipError_t hipBindTextureToArray(
- const struct texture<T, dim, readMode> &tex,
- hipArray_const_t array,
- const struct hipChannelFormatDesc &desc)
-{
- return hipBindTextureToArray(&tex, array, &desc);
-}
-
-template<class T, int dim, enum hipTextureReadMode readMode>
-static inline hipError_t hipBindTextureToMipmappedArray(
- const struct texture<T, dim, readMode> &tex,
- hipMipmappedArray_const_t mipmappedArray)
-{
- struct hipChannelFormatDesc desc;
- hipArray_t levelArray;
- hipError_t err = hipGetMipmappedArrayLevel(&levelArray, mipmappedArray, 0);
- if (err != hipSuccess) {
- return err;
- }
- err = hipGetChannelDesc(&desc, levelArray);
- return (err == hipSuccess) ? hipBindTextureToMipmappedArray(&tex, mipmappedArray, &desc) : err;
-}
-
-template<class T, int dim, enum hipTextureReadMode readMode>
-static inline hipError_t hipBindTextureToMipmappedArray(
- const struct texture<T, dim, readMode> &tex,
- hipMipmappedArray_const_t mipmappedArray,
- const struct hipChannelFormatDesc &desc)
-{
- return hipBindTextureToMipmappedArray(&tex, mipmappedArray, &desc);
-}
-
-template<class T, int dim, enum hipTextureReadMode readMode>
-DEPRECATED(DEPRECATED_MSG)
-static inline hipError_t hipUnbindTexture(
- const struct texture<T, dim, readMode> &tex)
-{
- return hipUnbindTexture(&tex);
-}
-#endif
-
-// doxygen end Texture
-/**
- * @}
- */
-
-
-#endif
-
-#ifdef __GNUC__
-#pragma GCC visibility pop
-#endif
-
-// doxygen end HIP API
-/**
- * @}
- */
-
-#endif
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_runtime_prof.h b/third_party/rocm/include/hip/hcc_detail/hip_runtime_prof.h
deleted file mode 100644
index ffd8b0a..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_runtime_prof.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
-Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_PROF_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_PROF_H
-
-// HIP ROCclr Op IDs enumeration
-enum HipVdiOpId {
- kHipVdiOpIdDispatch = 0,
- kHipVdiOpIdCopy = 1,
- kHipVdiOpIdBarrier = 2,
- kHipVdiOpIdNumber = 3
-};
-
-// Types of ROCclr commands
-enum HipVdiCommandKind {
- kHipVdiCommandKernel = 0x11F0,
- kHipVdiMemcpyDeviceToHost = 0x11F3,
- kHipHipVdiMemcpyHostToDevice = 0x11F4,
- kHipVdiMemcpyDeviceToDevice = 0x11F5,
- kHipVidMemcpyDeviceToHostRect = 0x1201,
- kHipVdiMemcpyHostToDeviceRect = 0x1202,
- kHipVdiMemcpyDeviceToDeviceRect = 0x1203,
- kHipVdiFillMemory = 0x1207,
-};
-
-/**
- * @brief Initializes activity callback
- *
- * @param [input] id_callback Event ID callback function
- * @param [input] op_callback Event operation callback function
- * @param [input] arg Arguments passed into callback
- *
- * @returns None
- */
-void hipInitActivityCallback(void* id_callback, void* op_callback, void* arg);
-
-/**
- * @brief Enables activity callback
- *
- * @param [input] op Operation, which will trigger a callback (@see HipVdiOpId)
- * @param [input] enable Enable state for the callback
- *
- * @returns True if successful
- */
-bool hipEnableActivityCallback(uint32_t op, bool enable);
-
-/**
- * @brief Returns the description string for the operation kind
- *
- * @param [input] id Command kind id (@see HipVdiCommandKind)
- *
- * @returns A pointer to a const string with the command description
- */
-const char* hipGetCmdName(uint32_t id);
-
-#endif // HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_PROF_H
-
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_surface_types.h b/third_party/rocm/include/hip/hcc_detail/hip_surface_types.h
deleted file mode 100644
index f74c01d..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_surface_types.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
-Copyright (c) 2015- present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/**
- * @file hcc_detail/hip_surface_types.h
- * @brief Defines surface types for HIP runtime.
- */
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_SURFACE_TYPES_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_SURFACE_TYPES_H
-
-#include <hip/hcc_detail/driver_types.h>
-
-/**
- * An opaque value that represents a hip surface object
- */
-typedef unsigned long long hipSurfaceObject_t;
-
-/**
- * hip surface reference
- */
-struct surfaceReference {
- hipSurfaceObject_t surfaceObject;
-};
-
-/**
- * hip surface boundary modes
- */
-enum hipSurfaceBoundaryMode {
- hipBoundaryModeZero = 0,
- hipBoundaryModeTrap = 1,
- hipBoundaryModeClamp = 2
-};
-
-#endif /* !HIP_INCLUDE_HIP_HCC_DETAIL_HIP_SURFACE_TYPES_H */
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_texture_types.h b/third_party/rocm/include/hip/hcc_detail/hip_texture_types.h
deleted file mode 100644
index a46b236..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_texture_types.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
-Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/**
- * @file hcc_detail/hip_texture_types.h
- * @brief Defines the different newt vector types for HIP runtime.
- */
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_TEXTURE_TYPES_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_TEXTURE_TYPES_H
-
-/*******************************************************************************
- * *
- * *
- * *
- *******************************************************************************/
-#include <limits.h>
-//#include <hip/hcc_detail/driver_types.h>
-#include <hip/hcc_detail/channel_descriptor.h>
-#include <hip/hcc_detail/texture_types.h>
-
-#if __cplusplus
-
-/*******************************************************************************
- * *
- * *
- * *
- *******************************************************************************/
-#if __HIP__
-#define __HIP_TEXTURE_ATTRIB __attribute__((device_builtin_texture_type))
-#else
-#define __HIP_TEXTURE_ATTRIB
-#endif
-
-typedef textureReference* hipTexRef;
-
-template <class T, int texType = hipTextureType1D,
- enum hipTextureReadMode mode = hipReadModeElementType>
-struct __HIP_TEXTURE_ATTRIB texture : public textureReference {
- texture(int norm = 0, enum hipTextureFilterMode fMode = hipFilterModePoint,
- enum hipTextureAddressMode aMode = hipAddressModeClamp) {
- normalized = norm;
- readMode = mode;
- filterMode = fMode;
- addressMode[0] = aMode;
- addressMode[1] = aMode;
- addressMode[2] = aMode;
- channelDesc = hipCreateChannelDesc<T>();
- sRGB = 0;
- textureObject = nullptr;
- maxAnisotropy = 0;
- mipmapLevelBias = 0;
- minMipmapLevelClamp = 0;
- maxMipmapLevelClamp = 0;
- }
-
- texture(int norm, enum hipTextureFilterMode fMode, enum hipTextureAddressMode aMode,
- struct hipChannelFormatDesc desc) {
- normalized = norm;
- readMode = mode;
- filterMode = fMode;
- addressMode[0] = aMode;
- addressMode[1] = aMode;
- addressMode[2] = aMode;
- channelDesc = desc;
- sRGB = 0;
- textureObject = nullptr;
- maxAnisotropy = 0;
- mipmapLevelBias = 0;
- minMipmapLevelClamp = 0;
- maxMipmapLevelClamp = 0;
- }
-};
-
-#endif /* __cplusplus */
-
-#endif /* !HIP_INCLUDE_HIP_HCC_DETAIL_HIP_TEXTURE_TYPES_H */
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_vector_types.h b/third_party/rocm/include/hip/hcc_detail/hip_vector_types.h
deleted file mode 100644
index 69525c5..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_vector_types.h
+++ /dev/null
@@ -1,1593 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/**
- * @file hcc_detail/hip_vector_types.h
- * @brief Defines the different newt vector types for HIP runtime.
- */
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_VECTOR_TYPES_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_VECTOR_TYPES_H
-
-#if defined(__HCC__) && (__hcc_workweek__ < 16032)
-#error("This version of HIP requires a newer version of HCC.");
-#endif
-
-#include "hip/hcc_detail/host_defines.h"
-
-#if defined(__has_attribute)
- #if __has_attribute(ext_vector_type)
- #define __NATIVE_VECTOR__(n, T) T __attribute__((ext_vector_type(n)))
- #else
- #define __NATIVE_VECTOR__(n, T) T[n]
- #endif
-
-#if defined(__cplusplus)
- #include <array>
- #include <iosfwd>
- #include <type_traits>
-
- namespace hip_impl {
- template<typename, typename, unsigned int> struct Scalar_accessor;
- } // Namespace hip_impl.
-
- namespace std {
- template<typename T, typename U, unsigned int n>
- struct is_integral<hip_impl::Scalar_accessor<T, U, n>>
- : is_integral<T> {};
- template<typename T, typename U, unsigned int n>
- struct is_floating_point<hip_impl::Scalar_accessor<T, U, n>>
- : is_floating_point<T> {};
- } // Namespace std.
-
- namespace hip_impl {
- template<typename T, typename Vector, unsigned int idx>
- struct Scalar_accessor {
- struct Address {
- const Scalar_accessor* p;
-
- __host__ __device__
- operator const T*() const noexcept {
- return &reinterpret_cast<const T*>(p)[idx];
- }
- __host__ __device__
- operator const T*() const volatile noexcept {
- return &reinterpret_cast<const T*>(p)[idx];
- }
- __host__ __device__
- operator T*() noexcept {
- return &reinterpret_cast<T*>(
- const_cast<Scalar_accessor*>(p))[idx];
- }
- __host__ __device__
- operator T*() volatile noexcept {
- return &reinterpret_cast<T*>(
- const_cast<Scalar_accessor*>(p))[idx];
- }
- };
-
- friend
- inline
- std::ostream& operator<<(std::ostream& os,
- const Scalar_accessor& x) noexcept {
- return os << x.data[idx];
- }
- friend
- inline
- std::istream& operator>>(std::istream& is,
- Scalar_accessor& x) noexcept {
- T tmp;
- is >> tmp;
- x.data[idx] = tmp;
-
- return is;
- }
-
- // Idea from https://t0rakka.silvrback.com/simd-scalar-accessor
- Vector data;
-
- __host__ __device__
- operator T() const noexcept { return data[idx]; }
- __host__ __device__
- operator T() const volatile noexcept { return data[idx]; }
-
-#ifdef __HIP_ENABLE_VECTOR_SCALAR_ACCESSORY_ENUM_CONVERSION__
- // The conversions to enum are fairly ghastly, but unfortunately used in
- // some pre-existing, difficult to modify, code.
- template<
- typename U,
- typename std::enable_if<
- !std::is_same<U, T>{} &&
- std::is_enum<U>{} &&
- std::is_convertible<
- T, typename std::enable_if<std::is_enum<U>::value, std::underlying_type<U>>::type::type>{}>::type* = nullptr>
- __host__ __device__
- operator U() const noexcept { return static_cast<U>(data[idx]); }
- template<
- typename U,
- typename std::enable_if<
- !std::is_same<U, T>{} &&
- std::is_enum<U>{} &&
- std::is_convertible<
- T, typename std::enable_if<std::is_enum<U>::value, std::underlying_type<U>>::type::type>{}>::type* = nullptr>
- __host__ __device__
- operator U() const volatile noexcept { return static_cast<U>(data[idx]); }
-#endif
-
- __host__ __device__
- operator T&() noexcept {
- return reinterpret_cast<
- T (&)[sizeof(Vector) / sizeof(T)]>(data)[idx];
- }
- __host__ __device__
- operator volatile T&() volatile noexcept {
- return reinterpret_cast<
- volatile T (&)[sizeof(Vector) / sizeof(T)]>(data)[idx];
- }
-
- __host__ __device__
- Address operator&() const noexcept { return Address{this}; }
-
- __host__ __device__
- Scalar_accessor& operator=(const Scalar_accessor& x) noexcept {
- data[idx] = x.data[idx];
-
- return *this;
- }
- __host__ __device__
- Scalar_accessor& operator=(T x) noexcept {
- data[idx] = x;
-
- return *this;
- }
- __host__ __device__
- volatile Scalar_accessor& operator=(T x) volatile noexcept {
- data[idx] = x;
-
- return *this;
- }
-
- __host__ __device__
- Scalar_accessor& operator++() noexcept {
- ++data[idx];
- return *this;
- }
- __host__ __device__
- T operator++(int) noexcept {
- auto r{data[idx]};
- ++data[idx];
- return *this;
- }
- __host__ __device__
- Scalar_accessor& operator--() noexcept {
- --data[idx];
- return *this;
- }
- __host__ __device__
- T operator--(int) noexcept {
- auto r{data[idx]};
- --data[idx];
- return *this;
- }
-
- // TODO: convertibility is too restrictive, constraint should be on
- // the operator being invocable with a value of type U.
- template<
- typename U,
- typename std::enable_if<
- std::is_convertible<U, T>{}>::type* = nullptr>
- __host__ __device__
- Scalar_accessor& operator+=(U x) noexcept {
- data[idx] += x;
- return *this;
- }
- template<
- typename U,
- typename std::enable_if<
- std::is_convertible<U, T>{}>::type* = nullptr>
- __host__ __device__
- Scalar_accessor& operator-=(U x) noexcept {
- data[idx] -= x;
- return *this;
- }
-
- template<
- typename U,
- typename std::enable_if<
- std::is_convertible<U, T>{}>::type* = nullptr>
- __host__ __device__
- Scalar_accessor& operator*=(U x) noexcept {
- data[idx] *= x;
- return *this;
- }
- template<
- typename U,
- typename std::enable_if<
- std::is_convertible<U, T>{}>::type* = nullptr>
- __host__ __device__
- Scalar_accessor& operator/=(U x) noexcept {
- data[idx] /= x;
- return *this;
- }
- template<
- typename U = T,
- typename std::enable_if<std::is_convertible<U, T>{} &&
- std::is_integral<U>{}>::type* = nullptr>
- __host__ __device__
- Scalar_accessor& operator%=(U x) noexcept {
- data[idx] %= x;
- return *this;
- }
-
- template<
- typename U = T,
- typename std::enable_if<std::is_convertible<U, T>{} &&
- std::is_integral<U>{}>::type* = nullptr>
- __host__ __device__
- Scalar_accessor& operator>>=(U x) noexcept {
- data[idx] >>= x;
- return *this;
- }
- template<
- typename U = T,
- typename std::enable_if<std::is_convertible<U, T>{} &&
- std::is_integral<U>{}>::type* = nullptr>
- __host__ __device__
- Scalar_accessor& operator<<=(U x) noexcept {
- data[idx] <<= x;
- return *this;
- }
- template<
- typename U = T,
- typename std::enable_if<std::is_convertible<U, T>{} &&
- std::is_integral<U>{}>::type* = nullptr>
- __host__ __device__
- Scalar_accessor& operator&=(U x) noexcept {
- data[idx] &= x;
- return *this;
- }
- template<
- typename U = T,
- typename std::enable_if<std::is_convertible<U, T>{} &&
- std::is_integral<U>{}>::type* = nullptr>
- __host__ __device__
- Scalar_accessor& operator|=(U x) noexcept {
- data[idx] |= x;
- return *this;
- }
- template<
- typename U = T,
- typename std::enable_if<std::is_convertible<U, T>{} &&
- std::is_integral<U>{}>::type* = nullptr>
- __host__ __device__
- Scalar_accessor& operator^=(U x) noexcept {
- data[idx] ^= x;
- return *this;
- }
- };
-
- inline
- constexpr
- unsigned int next_pot(unsigned int x) {
- // Precondition: x > 1.
- return 1u << (32u - __builtin_clz(x - 1u));
- }
- } // Namespace hip_impl.
-
- template<typename T, unsigned int n> struct HIP_vector_base;
-
- template<typename T>
- struct HIP_vector_base<T, 1> {
- using Native_vec_ = __NATIVE_VECTOR__(1, T);
-
- union {
- Native_vec_ data;
-#if __HIP_CLANG_ONLY__
- struct {
- T x;
- };
-#else
- hip_impl::Scalar_accessor<T, Native_vec_, 0> x;
-#endif
- };
-
- using value_type = T;
-
- __host__ __device__
- HIP_vector_base() = default;
- __host__ __device__
- explicit
- constexpr
- HIP_vector_base(T x) noexcept : data{x} {}
- __host__ __device__
- constexpr
- HIP_vector_base(const HIP_vector_base&) = default;
- __host__ __device__
- constexpr
- HIP_vector_base(HIP_vector_base&&) = default;
- __host__ __device__
- ~HIP_vector_base() = default;
-
- __host__ __device__
- HIP_vector_base& operator=(const HIP_vector_base& x) noexcept {
- #if __has_attribute(ext_vector_type)
- data = x.data;
- #else
- data[0] = x.data[0];
- #endif
-
- return *this;
- }
- };
-
- template<typename T>
- struct HIP_vector_base<T, 2> {
- using Native_vec_ = __NATIVE_VECTOR__(2, T);
-
- union
- #if !__has_attribute(ext_vector_type)
- alignas(hip_impl::next_pot(2 * sizeof(T)))
- #endif
- {
- Native_vec_ data;
-#if __HIP_CLANG_ONLY__
- struct {
- T x;
- T y;
- };
-#else
- hip_impl::Scalar_accessor<T, Native_vec_, 0> x;
- hip_impl::Scalar_accessor<T, Native_vec_, 1> y;
-#endif
- };
-
- using value_type = T;
-
- __host__ __device__
- HIP_vector_base() = default;
- __host__ __device__
- explicit
- constexpr
- HIP_vector_base(T x) noexcept : data{x, x} {}
- __host__ __device__
- constexpr
- HIP_vector_base(T x, T y) noexcept : data{x, y} {}
- __host__ __device__
- constexpr
- HIP_vector_base(const HIP_vector_base&) = default;
- __host__ __device__
- constexpr
- HIP_vector_base(HIP_vector_base&&) = default;
- __host__ __device__
- ~HIP_vector_base() = default;
-
- __host__ __device__
- HIP_vector_base& operator=(const HIP_vector_base& x) noexcept {
- #if __has_attribute(ext_vector_type)
- data = x.data;
- #else
- data[0] = x.data[0];
- data[1] = x.data[1];
- #endif
-
- return *this;
- }
- };
-
- template<typename T>
- struct HIP_vector_base<T, 3> {
- struct Native_vec_ {
- T d[3];
-
- __host__ __device__
- Native_vec_() = default;
-
- __host__ __device__
- explicit
- constexpr
- Native_vec_(T x) noexcept : d{x, x, x} {}
- __host__ __device__
- constexpr
- Native_vec_(T x, T y, T z) noexcept : d{x, y, z} {}
- __host__ __device__
- constexpr
- Native_vec_(const Native_vec_&) = default;
- __host__ __device__
- constexpr
- Native_vec_(Native_vec_&&) = default;
- __host__ __device__
- ~Native_vec_() = default;
-
- __host__ __device__
- Native_vec_& operator=(const Native_vec_&) = default;
- __host__ __device__
- Native_vec_& operator=(Native_vec_&&) = default;
-
- __host__ __device__
- T& operator[](unsigned int idx) noexcept { return d[idx]; }
- __host__ __device__
- T operator[](unsigned int idx) const noexcept { return d[idx]; }
-
- __host__ __device__
- Native_vec_& operator+=(const Native_vec_& x) noexcept
- {
- for (auto i = 0u; i != 3u; ++i) d[i] += x.d[i];
- return *this;
- }
- __host__ __device__
- Native_vec_& operator-=(const Native_vec_& x) noexcept
- {
- for (auto i = 0u; i != 3u; ++i) d[i] -= x.d[i];
- return *this;
- }
-
- __host__ __device__
- Native_vec_& operator*=(const Native_vec_& x) noexcept
- {
- for (auto i = 0u; i != 3u; ++i) d[i] *= x.d[i];
- return *this;
- }
- __host__ __device__
- Native_vec_& operator/=(const Native_vec_& x) noexcept
- {
- for (auto i = 0u; i != 3u; ++i) d[i] /= x.d[i];
- return *this;
- }
-
- template<
- typename U = T,
- typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
- __host__ __device__
- Native_vec_ operator-() const noexcept
- {
- auto r{*this};
- for (auto&& x : r.d) x = -x;
- return r;
- }
-
- template<
- typename U = T,
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
- __host__ __device__
- Native_vec_ operator~() const noexcept
- {
- auto r{*this};
- for (auto&& x : r.d) x = ~x;
- return r;
- }
- template<
- typename U = T,
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
- __host__ __device__
- Native_vec_& operator%=(const Native_vec_& x) noexcept
- {
- for (auto i = 0u; i != 3u; ++i) d[i] %= x.d[i];
- return *this;
- }
- template<
- typename U = T,
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
- __host__ __device__
- Native_vec_& operator^=(const Native_vec_& x) noexcept
- {
- for (auto i = 0u; i != 3u; ++i) d[i] ^= x.d[i];
- return *this;
- }
- template<
- typename U = T,
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
- __host__ __device__
- Native_vec_& operator|=(const Native_vec_& x) noexcept
- {
- for (auto i = 0u; i != 3u; ++i) d[i] |= x.d[i];
- return *this;
- }
- template<
- typename U = T,
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
- __host__ __device__
- Native_vec_& operator&=(const Native_vec_& x) noexcept
- {
- for (auto i = 0u; i != 3u; ++i) d[i] &= x.d[i];
- return *this;
- }
- template<
- typename U = T,
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
- __host__ __device__
- Native_vec_& operator>>=(const Native_vec_& x) noexcept
- {
- for (auto i = 0u; i != 3u; ++i) d[i] >>= x.d[i];
- return *this;
- }
- template<
- typename U = T,
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
- __host__ __device__
- Native_vec_& operator<<=(const Native_vec_& x) noexcept
- {
- for (auto i = 0u; i != 3u; ++i) d[i] <<= x.d[i];
- return *this;
- }
-
- using Vec3_cmp = int __attribute__((vector_size(4 * sizeof(int))));
- __host__ __device__
- Vec3_cmp operator==(const Native_vec_& x) const noexcept
- {
- return Vec3_cmp{d[0] == x.d[0], d[1] == x.d[1], d[2] == x.d[2]};
- }
- };
-
- union {
- Native_vec_ data;
- struct {
- T x;
- T y;
- T z;
- };
- };
-
- using value_type = T;
-
- __host__ __device__
- HIP_vector_base() = default;
- __host__ __device__
- explicit
- constexpr
- HIP_vector_base(T x) noexcept : data{x, x, x} {}
- __host__ __device__
- constexpr
- HIP_vector_base(T x, T y, T z) noexcept : data{x, y, z} {}
- __host__ __device__
- constexpr
- HIP_vector_base(const HIP_vector_base&) = default;
- __host__ __device__
- constexpr
- HIP_vector_base(HIP_vector_base&&) = default;
- __host__ __device__
- ~HIP_vector_base() = default;
-
- __host__ __device__
- HIP_vector_base& operator=(const HIP_vector_base&) = default;
- __host__ __device__
- HIP_vector_base& operator=(HIP_vector_base&&) = default;
- };
-
- template<typename T>
- struct HIP_vector_base<T, 4> {
- using Native_vec_ = __NATIVE_VECTOR__(4, T);
-
- union
- #if !__has_attribute(ext_vector_type)
- alignas(hip_impl::next_pot(4 * sizeof(T)))
- #endif
- {
- Native_vec_ data;
-#if __HIP_CLANG_ONLY__
- struct {
- T x;
- T y;
- T z;
- T w;
- };
-#else
- hip_impl::Scalar_accessor<T, Native_vec_, 0> x;
- hip_impl::Scalar_accessor<T, Native_vec_, 1> y;
- hip_impl::Scalar_accessor<T, Native_vec_, 2> z;
- hip_impl::Scalar_accessor<T, Native_vec_, 3> w;
-#endif
- };
-
- using value_type = T;
-
- __host__ __device__
- HIP_vector_base() = default;
- __host__ __device__
- explicit
- constexpr
- HIP_vector_base(T x) noexcept : data{x, x, x, x} {}
- __host__ __device__
- constexpr
- HIP_vector_base(T x, T y, T z, T w) noexcept : data{x, y, z, w} {}
- __host__ __device__
- constexpr
- HIP_vector_base(const HIP_vector_base&) = default;
- __host__ __device__
- constexpr
- HIP_vector_base(HIP_vector_base&&) = default;
- __host__ __device__
- ~HIP_vector_base() = default;
-
- __host__ __device__
- HIP_vector_base& operator=(const HIP_vector_base& x) noexcept {
- #if __has_attribute(ext_vector_type)
- data = x.data;
- #else
- data[0] = x.data[0];
- data[1] = x.data[1];
- data[2] = x.data[2];
- data[3] = x.data[3];
- #endif
-
- return *this;
- }
- };
-
- template<typename T, unsigned int rank>
- struct HIP_vector_type : public HIP_vector_base<T, rank> {
- using HIP_vector_base<T, rank>::data;
- using typename HIP_vector_base<T, rank>::Native_vec_;
-
- __host__ __device__
- HIP_vector_type() = default;
- template<
- typename U,
- typename std::enable_if<
- std::is_convertible<U, T>{}>::type* = nullptr>
- __host__ __device__
- explicit
- constexpr
- HIP_vector_type(U x) noexcept
- : HIP_vector_base<T, rank>{static_cast<T>(x)}
- {}
- template< // TODO: constrain based on type as well.
- typename... Us,
- typename std::enable_if<
- (rank > 1) && sizeof...(Us) == rank>::type* = nullptr>
- __host__ __device__
- constexpr
- HIP_vector_type(Us... xs) noexcept
- : HIP_vector_base<T, rank>{static_cast<T>(xs)...}
- {}
- __host__ __device__
- constexpr
- HIP_vector_type(const HIP_vector_type&) = default;
- __host__ __device__
- constexpr
- HIP_vector_type(HIP_vector_type&&) = default;
- __host__ __device__
- ~HIP_vector_type() = default;
-
- __host__ __device__
- HIP_vector_type& operator=(const HIP_vector_type&) = default;
- __host__ __device__
- HIP_vector_type& operator=(HIP_vector_type&&) = default;
-
- // Operators
- __host__ __device__
- HIP_vector_type& operator++() noexcept
- {
- return *this += HIP_vector_type{1};
- }
- __host__ __device__
- HIP_vector_type operator++(int) noexcept
- {
- auto tmp(*this);
- ++*this;
- return tmp;
- }
-
- __host__ __device__
- HIP_vector_type& operator--() noexcept
- {
- return *this -= HIP_vector_type{1};
- }
- __host__ __device__
- HIP_vector_type operator--(int) noexcept
- {
- auto tmp(*this);
- --*this;
- return tmp;
- }
-
- __host__ __device__
- HIP_vector_type& operator+=(const HIP_vector_type& x) noexcept
- {
- data += x.data;
- return *this;
- }
- template<
- typename U,
- typename std::enable_if<
- std::is_convertible<U, T>{}>::type* = nullptr>
- __host__ __device__
- HIP_vector_type& operator+=(U x) noexcept
- {
- return *this += HIP_vector_type{x};
- }
-
- __host__ __device__
- HIP_vector_type& operator-=(const HIP_vector_type& x) noexcept
- {
- data -= x.data;
- return *this;
- }
- template<
- typename U,
- typename std::enable_if<
- std::is_convertible<U, T>{}>::type* = nullptr>
- __host__ __device__
- HIP_vector_type& operator-=(U x) noexcept
- {
- return *this -= HIP_vector_type{x};
- }
-
- __host__ __device__
- HIP_vector_type& operator*=(const HIP_vector_type& x) noexcept
- {
- data *= x.data;
- return *this;
- }
- template<
- typename U,
- typename std::enable_if<
- std::is_convertible<U, T>{}>::type* = nullptr>
- __host__ __device__
- HIP_vector_type& operator*=(U x) noexcept
- {
- return *this *= HIP_vector_type{x};
- }
-
- __host__ __device__
- HIP_vector_type& operator/=(const HIP_vector_type& x) noexcept
- {
- data /= x.data;
- return *this;
- }
- template<
- typename U,
- typename std::enable_if<
- std::is_convertible<U, T>{}>::type* = nullptr>
- __host__ __device__
- HIP_vector_type& operator/=(U x) noexcept
- {
- return *this /= HIP_vector_type{x};
- }
-
- template<
- typename U = T,
- typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
- __host__ __device__
- HIP_vector_type operator-() const noexcept
- {
- auto tmp(*this);
- tmp.data = -tmp.data;
- return tmp;
- }
-
- template<
- typename U = T,
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
- __host__ __device__
- HIP_vector_type operator~() const noexcept
- {
- HIP_vector_type r{*this};
- r.data = ~r.data;
- return r;
- }
-
- template<
- typename U = T,
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
- __host__ __device__
- HIP_vector_type& operator%=(const HIP_vector_type& x) noexcept
- {
- data %= x.data;
- return *this;
- }
-
- template<
- typename U = T,
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
- __host__ __device__
- HIP_vector_type& operator^=(const HIP_vector_type& x) noexcept
- {
- data ^= x.data;
- return *this;
- }
-
- template<
- typename U = T,
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
- __host__ __device__
- HIP_vector_type& operator|=(const HIP_vector_type& x) noexcept
- {
- data |= x.data;
- return *this;
- }
-
- template<
- typename U = T,
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
- __host__ __device__
- HIP_vector_type& operator&=(const HIP_vector_type& x) noexcept
- {
- data &= x.data;
- return *this;
- }
-
- template<
- typename U = T,
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
- __host__ __device__
- HIP_vector_type& operator>>=(const HIP_vector_type& x) noexcept
- {
- data >>= x.data;
- return *this;
- }
-
- template<
- typename U = T,
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
- __host__ __device__
- HIP_vector_type& operator<<=(const HIP_vector_type& x) noexcept
- {
- data <<= x.data;
- return *this;
- }
- };
-
- template<typename T, unsigned int n>
- __host__ __device__
- inline
- constexpr
- HIP_vector_type<T, n> operator+(
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} += y;
- }
- template<typename T, unsigned int n, typename U>
- __host__ __device__
- inline
- constexpr
- HIP_vector_type<T, n> operator+(
- const HIP_vector_type<T, n>& x, U y) noexcept
- {
- return HIP_vector_type<T, n>{x} += HIP_vector_type<T, n>{y};
- }
- template<typename T, unsigned int n, typename U>
- __host__ __device__
- inline
- constexpr
- HIP_vector_type<T, n> operator+(
- U x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} += y;
- }
-
- template<typename T, unsigned int n>
- __host__ __device__
- inline
- constexpr
- HIP_vector_type<T, n> operator-(
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} -= y;
- }
- template<typename T, unsigned int n, typename U>
- __host__ __device__
- inline
- constexpr
- HIP_vector_type<T, n> operator-(
- const HIP_vector_type<T, n>& x, U y) noexcept
- {
- return HIP_vector_type<T, n>{x} -= HIP_vector_type<T, n>{y};
- }
- template<typename T, unsigned int n, typename U>
- __host__ __device__
- inline
- constexpr
- HIP_vector_type<T, n> operator-(
- U x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} -= y;
- }
-
- template<typename T, unsigned int n>
- __host__ __device__
- inline
- constexpr
- HIP_vector_type<T, n> operator*(
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} *= y;
- }
- template<typename T, unsigned int n, typename U>
- __host__ __device__
- inline
- constexpr
- HIP_vector_type<T, n> operator*(
- const HIP_vector_type<T, n>& x, U y) noexcept
- {
- return HIP_vector_type<T, n>{x} *= HIP_vector_type<T, n>{y};
- }
- template<typename T, unsigned int n, typename U>
- __host__ __device__
- inline
- constexpr
- HIP_vector_type<T, n> operator*(
- U x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} *= y;
- }
-
- template<typename T, unsigned int n>
- __host__ __device__
- inline
- constexpr
- HIP_vector_type<T, n> operator/(
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} /= y;
- }
- template<typename T, unsigned int n, typename U>
- __host__ __device__
- inline
- constexpr
- HIP_vector_type<T, n> operator/(
- const HIP_vector_type<T, n>& x, U y) noexcept
- {
- return HIP_vector_type<T, n>{x} /= HIP_vector_type<T, n>{y};
- }
- template<typename T, unsigned int n, typename U>
- __host__ __device__
- inline
- constexpr
- HIP_vector_type<T, n> operator/(
- U x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} /= y;
- }
-
- template<typename V>
- __host__ __device__
- inline
- constexpr
- bool _hip_any_zero(const V& x, int n) noexcept
- {
- return
- (n == -1) ? true : ((x[n] == 0) ? false : _hip_any_zero(x, n - 1));
- }
-
- template<typename T, unsigned int n>
- __host__ __device__
- inline
- constexpr
- bool operator==(
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
- {
- return _hip_any_zero(x.data == y.data, n - 1);
- }
- template<typename T, unsigned int n, typename U>
- __host__ __device__
- inline
- constexpr
- bool operator==(const HIP_vector_type<T, n>& x, U y) noexcept
- {
- return x == HIP_vector_type<T, n>{y};
- }
- template<typename T, unsigned int n, typename U>
- __host__ __device__
- inline
- constexpr
- bool operator==(U x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} == y;
- }
-
- template<typename T, unsigned int n>
- __host__ __device__
- inline
- constexpr
- bool operator!=(
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
- {
- return !(x == y);
- }
- template<typename T, unsigned int n, typename U>
- __host__ __device__
- inline
- constexpr
- bool operator!=(const HIP_vector_type<T, n>& x, U y) noexcept
- {
- return !(x == y);
- }
- template<typename T, unsigned int n, typename U>
- __host__ __device__
- inline
- constexpr
- bool operator!=(U x, const HIP_vector_type<T, n>& y) noexcept
- {
- return !(x == y);
- }
-
- template<
- typename T,
- unsigned int n,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __host__ __device__
- inline
- constexpr
- HIP_vector_type<T, n> operator%(
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} %= y;
- }
- template<
- typename T,
- unsigned int n,
- typename U,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __host__ __device__
- inline
- constexpr
- HIP_vector_type<T, n> operator%(
- const HIP_vector_type<T, n>& x, U y) noexcept
- {
- return HIP_vector_type<T, n>{x} %= HIP_vector_type<T, n>{y};
- }
- template<
- typename T,
- unsigned int n,
- typename U,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __host__ __device__
- inline
- constexpr
- HIP_vector_type<T, n> operator%(
- U x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} %= y;
- }
-
- template<
- typename T,
- unsigned int n,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __host__ __device__
- inline
- constexpr
- HIP_vector_type<T, n> operator^(
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} ^= y;
- }
- template<
- typename T,
- unsigned int n,
- typename U,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __host__ __device__
- inline
- constexpr
- HIP_vector_type<T, n> operator^(
- const HIP_vector_type<T, n>& x, U y) noexcept
- {
- return HIP_vector_type<T, n>{x} ^= HIP_vector_type<T, n>{y};
- }
- template<
- typename T,
- unsigned int n,
- typename U,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __host__ __device__
- inline
- constexpr
- HIP_vector_type<T, n> operator^(
- U x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} ^= y;
- }
-
- template<
- typename T,
- unsigned int n,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __host__ __device__
- inline
- constexpr
- HIP_vector_type<T, n> operator|(
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} |= y;
- }
- template<
- typename T,
- unsigned int n,
- typename U,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __host__ __device__
- inline
- constexpr
- HIP_vector_type<T, n> operator|(
- const HIP_vector_type<T, n>& x, U y) noexcept
- {
- return HIP_vector_type<T, n>{x} |= HIP_vector_type<T, n>{y};
- }
- template<
- typename T,
- unsigned int n,
- typename U,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __host__ __device__
- inline
- constexpr
- HIP_vector_type<T, n> operator|(
- U x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} |= y;
- }
-
- template<
- typename T,
- unsigned int n,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __host__ __device__
- inline
- constexpr
- HIP_vector_type<T, n> operator&(
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} &= y;
- }
- template<
- typename T,
- unsigned int n,
- typename U,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __host__ __device__
- inline
- constexpr
- HIP_vector_type<T, n> operator&(
- const HIP_vector_type<T, n>& x, U y) noexcept
- {
- return HIP_vector_type<T, n>{x} &= HIP_vector_type<T, n>{y};
- }
- template<
- typename T,
- unsigned int n,
- typename U,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __host__ __device__
- inline
- constexpr
- HIP_vector_type<T, n> operator&(
- U x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} &= y;
- }
-
- template<
- typename T,
- unsigned int n,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __host__ __device__
- inline
- constexpr
- HIP_vector_type<T, n> operator>>(
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} >>= y;
- }
- template<
- typename T,
- unsigned int n,
- typename U,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __host__ __device__
- inline
- constexpr
- HIP_vector_type<T, n> operator>>(
- const HIP_vector_type<T, n>& x, U y) noexcept
- {
- return HIP_vector_type<T, n>{x} >>= HIP_vector_type<T, n>{y};
- }
- template<
- typename T,
- unsigned int n,
- typename U,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __host__ __device__
- inline
- constexpr
- HIP_vector_type<T, n> operator>>(
- U x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} >>= y;
- }
-
- template<
- typename T,
- unsigned int n,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __host__ __device__
- inline
- constexpr
- HIP_vector_type<T, n> operator<<(
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} <<= y;
- }
- template<
- typename T,
- unsigned int n,
- typename U,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __host__ __device__
- inline
- constexpr
- HIP_vector_type<T, n> operator<<(
- const HIP_vector_type<T, n>& x, U y) noexcept
- {
- return HIP_vector_type<T, n>{x} <<= HIP_vector_type<T, n>{y};
- }
- template<
- typename T,
- unsigned int n,
- typename U,
- typename std::enable_if<std::is_arithmetic<U>::value>::type,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __host__ __device__
- inline
- constexpr
- HIP_vector_type<T, n> operator<<(
- U x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} <<= y;
- }
-
- #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
- using CUDA_name##1 = HIP_vector_type<T, 1>;\
- using CUDA_name##2 = HIP_vector_type<T, 2>;\
- using CUDA_name##3 = HIP_vector_type<T, 3>;\
- using CUDA_name##4 = HIP_vector_type<T, 4>;
-#else
- #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
- typedef struct {\
- T x;\
- } CUDA_name##1;\
- typedef struct {\
- T x;\
- T y;\
- } CUDA_name##2;\
- typedef struct {\
- T x;\
- T y;\
- T z;\
- } CUDA_name##3;\
- typedef struct {\
- T x;\
- T y;\
- T z;\
- T w;\
- } CUDA_name##4;
-#endif
-
-__MAKE_VECTOR_TYPE__(uchar, unsigned char);
-__MAKE_VECTOR_TYPE__(char, char);
-__MAKE_VECTOR_TYPE__(ushort, unsigned short);
-__MAKE_VECTOR_TYPE__(short, short);
-__MAKE_VECTOR_TYPE__(uint, unsigned int);
-__MAKE_VECTOR_TYPE__(int, int);
-__MAKE_VECTOR_TYPE__(ulong, unsigned long);
-__MAKE_VECTOR_TYPE__(long, long);
-__MAKE_VECTOR_TYPE__(ulonglong, unsigned long long);
-__MAKE_VECTOR_TYPE__(longlong, long long);
-__MAKE_VECTOR_TYPE__(float, float);
-__MAKE_VECTOR_TYPE__(double, double);
-
-#ifdef __cplusplus
-#define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
- static inline __device__ __host__ \
- type make_##type(comp x) { type r{x}; return r; }
-
-#define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
- static inline __device__ __host__ \
- type make_##type(comp x, comp y) { type r{x, y}; return r; }
-
-#define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
- static inline __device__ __host__ \
- type make_##type(comp x, comp y, comp z) { type r{x, y, z}; return r; }
-
-#define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
- static inline __device__ __host__ \
- type make_##type(comp x, comp y, comp z, comp w) { \
- type r{x, y, z, w}; \
- return r; \
- }
-#else
- #define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
- static inline __device__ __host__ \
- type make_##type(comp x) { type r; r.x =x; return r; }
-
- #define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
- static inline __device__ __host__ \
- type make_##type(comp x, comp y) { type r; r.x=x; r.y=y; return r; }
-
- #define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
- static inline __device__ __host__ \
- type make_##type(comp x, comp y, comp z) { type r; r.x=x; r.y=y; r.z=z; return r; }
-
- #define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
- static inline __device__ __host__ \
- type make_##type(comp x, comp y, comp z, comp w) { \
- type r; r.x=x; r.y=y; r.z=z; r.w=w; \
- return r; \
- }
-#endif
-
-DECLOP_MAKE_ONE_COMPONENT(unsigned char, uchar1);
-DECLOP_MAKE_TWO_COMPONENT(unsigned char, uchar2);
-DECLOP_MAKE_THREE_COMPONENT(unsigned char, uchar3);
-DECLOP_MAKE_FOUR_COMPONENT(unsigned char, uchar4);
-
-DECLOP_MAKE_ONE_COMPONENT(signed char, char1);
-DECLOP_MAKE_TWO_COMPONENT(signed char, char2);
-DECLOP_MAKE_THREE_COMPONENT(signed char, char3);
-DECLOP_MAKE_FOUR_COMPONENT(signed char, char4);
-
-DECLOP_MAKE_ONE_COMPONENT(unsigned short, ushort1);
-DECLOP_MAKE_TWO_COMPONENT(unsigned short, ushort2);
-DECLOP_MAKE_THREE_COMPONENT(unsigned short, ushort3);
-DECLOP_MAKE_FOUR_COMPONENT(unsigned short, ushort4);
-
-DECLOP_MAKE_ONE_COMPONENT(signed short, short1);
-DECLOP_MAKE_TWO_COMPONENT(signed short, short2);
-DECLOP_MAKE_THREE_COMPONENT(signed short, short3);
-DECLOP_MAKE_FOUR_COMPONENT(signed short, short4);
-
-DECLOP_MAKE_ONE_COMPONENT(unsigned int, uint1);
-DECLOP_MAKE_TWO_COMPONENT(unsigned int, uint2);
-DECLOP_MAKE_THREE_COMPONENT(unsigned int, uint3);
-DECLOP_MAKE_FOUR_COMPONENT(unsigned int, uint4);
-
-DECLOP_MAKE_ONE_COMPONENT(signed int, int1);
-DECLOP_MAKE_TWO_COMPONENT(signed int, int2);
-DECLOP_MAKE_THREE_COMPONENT(signed int, int3);
-DECLOP_MAKE_FOUR_COMPONENT(signed int, int4);
-
-DECLOP_MAKE_ONE_COMPONENT(float, float1);
-DECLOP_MAKE_TWO_COMPONENT(float, float2);
-DECLOP_MAKE_THREE_COMPONENT(float, float3);
-DECLOP_MAKE_FOUR_COMPONENT(float, float4);
-
-DECLOP_MAKE_ONE_COMPONENT(double, double1);
-DECLOP_MAKE_TWO_COMPONENT(double, double2);
-DECLOP_MAKE_THREE_COMPONENT(double, double3);
-DECLOP_MAKE_FOUR_COMPONENT(double, double4);
-
-DECLOP_MAKE_ONE_COMPONENT(unsigned long, ulong1);
-DECLOP_MAKE_TWO_COMPONENT(unsigned long, ulong2);
-DECLOP_MAKE_THREE_COMPONENT(unsigned long, ulong3);
-DECLOP_MAKE_FOUR_COMPONENT(unsigned long, ulong4);
-
-DECLOP_MAKE_ONE_COMPONENT(signed long, long1);
-DECLOP_MAKE_TWO_COMPONENT(signed long, long2);
-DECLOP_MAKE_THREE_COMPONENT(signed long, long3);
-DECLOP_MAKE_FOUR_COMPONENT(signed long, long4);
-
-DECLOP_MAKE_ONE_COMPONENT(unsigned long long, ulonglong1);
-DECLOP_MAKE_TWO_COMPONENT(unsigned long long, ulonglong2);
-DECLOP_MAKE_THREE_COMPONENT(unsigned long long, ulonglong3);
-DECLOP_MAKE_FOUR_COMPONENT(unsigned long long, ulonglong4);
-
-DECLOP_MAKE_ONE_COMPONENT(signed long long, longlong1);
-DECLOP_MAKE_TWO_COMPONENT(signed long long, longlong2);
-DECLOP_MAKE_THREE_COMPONENT(signed long long, longlong3);
-DECLOP_MAKE_FOUR_COMPONENT(signed long long, longlong4);
-#else // !defined(__has_attribute)
-
-#if defined(_MSC_VER)
-#include <mmintrin.h>
-#include <xmmintrin.h>
-#include <emmintrin.h>
-#include <immintrin.h>
-
-typedef union { char data; } char1;
-typedef union { char data[2]; } char2;
-typedef union { char data[4]; } char4;
-typedef union { char4 data; } char3;
-typedef union { __m64 data; } char8;
-typedef union { __m128i data; } char16;
-
-typedef union { unsigned char data; } uchar1;
-typedef union { unsigned char data[2]; } uchar2;
-typedef union { unsigned char data[4]; } uchar4;
-typedef union { uchar4 data; } uchar3;
-typedef union { __m64 data; } uchar8;
-typedef union { __m128i data; } uchar16;
-
-typedef union { short data; } short1;
-typedef union { short data[2]; } short2;
-typedef union { __m64 data; } short4;
-typedef union { short4 data; } short3;
-typedef union { __m128i data; } short8;
-typedef union { __m128i data[2]; } short16;
-
-typedef union { unsigned short data; } ushort1;
-typedef union { unsigned short data[2]; } ushort2;
-typedef union { __m64 data; } ushort4;
-typedef union { ushort4 data; } ushort3;
-typedef union { __m128i data; } ushort8;
-typedef union { __m128i data[2]; } ushort16;
-
-typedef union { int data; } int1;
-typedef union { __m64 data; } int2;
-typedef union { __m128i data; } int4;
-typedef union { int4 data; } int3;
-typedef union { __m128i data[2]; } int8;
-typedef union { __m128i data[4];} int16;
-
-typedef union { unsigned int data; } uint1;
-typedef union { __m64 data; } uint2;
-typedef union { __m128i data; } uint4;
-typedef union { uint4 data; } uint3;
-typedef union { __m128i data[2]; } uint8;
-typedef union { __m128i data[4]; } uint16;
-
-#if !defined(_WIN64)
-typedef union { int data; } long1;
-typedef union { __m64 data; } long2;
-typedef union { __m128i data; } long4;
-typedef union { long4 data; } long3;
-typedef union { __m128i data[2]; } long8;
-typedef union { __m128i data[4]; } long16;
-
-typedef union { unsigned int data; } ulong1;
-typedef union { __m64 data; } ulong2;
-typedef union { __m128i data; } ulong4;
-typedef union { ulong4 data; } ulong3;
-typedef union { __m128i data[2]; } ulong8;
-typedef union { __m128i data[4]; } ulong16;
-#else // defined(_WIN64)
-typedef union { __m64 data; } long1;
-typedef union { __m128i data; } long2;
-typedef union { __m128i data[2]; } long4;
-typedef union { long4 data; } long3;
-typedef union { __m128i data[4]; } long8;
-typedef union { __m128i data[8]; } long16;
-
-typedef union { __m64 data; } ulong1;
-typedef union { __m128i data; } ulong2;
-typedef union { __m128i data[2]; } ulong4;
-typedef union { ulong4 data; } ulong3;
-typedef union { __m128i data[4]; } ulong8;
-typedef union { __m128i data[8]; } ulong16;
-#endif // defined(_WIN64)
-
-typedef union { __m64 data; } longlong1;
-typedef union { __m128i data; } longlong2;
-typedef union { __m128i data[2]; } longlong4;
-typedef union { longlong4 data; } longlong3;
-typedef union { __m128i data[4]; } longlong8;
-typedef union { __m128i data[8]; } longlong16;
-
-typedef union { __m64 data; } ulonglong1;
-typedef union { __m128i data; } ulonglong2;
-typedef union { __m128i data[2]; } ulonglong4;
-typedef union { ulonglong4 data; } ulonglong3;
-typedef union { __m128i data[4]; } ulonglong8;
-typedef union { __m128i data[8]; } ulonglong16;
-
-typedef union { float data; } float1;
-typedef union { __m64 data; } float2;
-typedef union { __m128 data; } float4;
-typedef union { float4 data; } float3;
-typedef union { __m256 data; } float8;
-typedef union { __m256 data[2]; } float16;
-
-typedef union { double data; } double1;
-typedef union { __m128d data; } double2;
-typedef union { __m256d data; } double4;
-typedef union { double4 data; } double3;
-typedef union { __m256d data[2]; } double8;
-typedef union { __m256d data[4]; } double16;
-
-#else // !defined(_MSC_VER)
-
-typedef union { char data; } char1;
-typedef union { char data[2]; } char2;
-typedef union { char data[4]; } char4;
-typedef union { char data[8]; } char8;
-typedef union { char data[16]; } char16;
-typedef union { char4 data; } char3;
-
-typedef union { unsigned char data; } uchar1;
-typedef union { unsigned char data[2]; } uchar2;
-typedef union { unsigned char data[4]; } uchar4;
-typedef union { unsigned char data[8]; } uchar8;
-typedef union { unsigned char data[16]; } uchar16;
-typedef union { uchar4 data; } uchar3;
-
-typedef union { short data; } short1;
-typedef union { short data[2]; } short2;
-typedef union { short data[4]; } short4;
-typedef union { short data[8]; } short8;
-typedef union { short data[16]; } short16;
-typedef union { short4 data; } short3;
-
-typedef union { unsigned short data; } ushort1;
-typedef union { unsigned short data[2]; } ushort2;
-typedef union { unsigned short data[4]; } ushort4;
-typedef union { unsigned short data[8]; } ushort8;
-typedef union { unsigned short data[16]; } ushort16;
-typedef union { ushort4 data; } ushort3;
-
-typedef union { int data; } int1;
-typedef union { int data[2]; } int2;
-typedef union { int data[4]; } int4;
-typedef union { int data[8]; } int8;
-typedef union { int data[16]; } int16;
-typedef union { int4 data; } int3;
-
-typedef union { unsigned int data; } uint1;
-typedef union { unsigned int data[2]; } uint2;
-typedef union { unsigned int data[4]; } uint4;
-typedef union { unsigned int data[8]; } uint8;
-typedef union { unsigned int data[16]; } uint16;
-typedef union { uint4 data; } uint3;
-
-typedef union { long data; } long1;
-typedef union { long data[2]; } long2;
-typedef union { long data[4]; } long4;
-typedef union { long data[8]; } long8;
-typedef union { long data[16]; } long16;
-typedef union { long4 data; } long3;
-
-typedef union { unsigned long data; } ulong1;
-typedef union { unsigned long data[2]; } ulong2;
-typedef union { unsigned long data[4]; } ulong4;
-typedef union { unsigned long data[8]; } ulong8;
-typedef union { unsigned long data[16]; } ulong16;
-typedef union { ulong4 data; } ulong3;
-
-typedef union { long long data; } longlong1;
-typedef union { long long data[2]; } longlong2;
-typedef union { long long data[4]; } longlong4;
-typedef union { long long data[8]; } longlong8;
-typedef union { long long data[16]; } longlong16;
-typedef union { longlong4 data; } longlong3;
-
-typedef union { unsigned long long data; } ulonglong1;
-typedef union { unsigned long long data[2]; } ulonglong2;
-typedef union { unsigned long long data[4]; } ulonglong4;
-typedef union { unsigned long long data[8]; } ulonglong8;
-typedef union { unsigned long long data[16]; } ulonglong16;
-typedef union { ulonglong4 data; } ulonglong3;
-
-typedef union { float data; } float1;
-typedef union { float data[2]; } float2;
-typedef union { float data[4]; } float4;
-typedef union { float data[8]; } float8;
-typedef union { float data[16]; } float16;
-typedef union { float4 data; } float3;
-
-typedef union { double data; } double1;
-typedef union { double data[2]; } double2;
-typedef union { double data[4]; } double4;
-typedef union { double data[8]; } double8;
-typedef union { double data[16]; } double16;
-typedef union { double4 data; } double3;
-
-#endif // defined(_MSC_VER)
-#endif // defined(__has_attribute)
-#endif
diff --git a/third_party/rocm/include/hip/hcc_detail/hiprtc.h b/third_party/rocm/include/hip/hcc_detail/hiprtc.h
deleted file mode 100644
index fecea75..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hiprtc.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-#ifndef HIPRTC_H
-#define HIPRTC_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-#include <stdlib.h>
-
-#if !defined(_WIN32)
-#pragma GCC visibility push (default)
-#endif
-
-enum hiprtcResult {
- HIPRTC_SUCCESS = 0,
- HIPRTC_ERROR_OUT_OF_MEMORY = 1,
- HIPRTC_ERROR_PROGRAM_CREATION_FAILURE = 2,
- HIPRTC_ERROR_INVALID_INPUT = 3,
- HIPRTC_ERROR_INVALID_PROGRAM = 4,
- HIPRTC_ERROR_INVALID_OPTION = 5,
- HIPRTC_ERROR_COMPILATION = 6,
- HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7,
- HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8,
- HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9,
- HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10,
- HIPRTC_ERROR_INTERNAL_ERROR = 11
-};
-
-const char* hiprtcGetErrorString(hiprtcResult result);
-
-
-hiprtcResult hiprtcVersion(int* major, int* minor);
-
-typedef struct _hiprtcProgram* hiprtcProgram;
-
-hiprtcResult hiprtcAddNameExpression(hiprtcProgram prog,
- const char* name_expression);
-
-hiprtcResult hiprtcCompileProgram(hiprtcProgram prog,
- int numOptions,
- const char** options);
-
-hiprtcResult hiprtcCreateProgram(hiprtcProgram* prog,
- const char* src,
- const char* name,
- int numHeaders,
- const char** headers,
- const char** includeNames);
-
-hiprtcResult hiprtcDestroyProgram(hiprtcProgram* prog);
-
-hiprtcResult hiprtcGetLoweredName(hiprtcProgram prog,
- const char* name_expression,
- const char** lowered_name);
-
-hiprtcResult hiprtcGetProgramLog(hiprtcProgram prog, char* log);
-
-hiprtcResult hiprtcGetProgramLogSize(hiprtcProgram prog,
- size_t* logSizeRet);
-
-hiprtcResult hiprtcGetCode(hiprtcProgram prog, char* code);
-
-hiprtcResult hiprtcGetCodeSize(hiprtcProgram prog, size_t* codeSizeRet);
-
-#if !defined(_WIN32)
-#pragma GCC visibility pop
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif //HIPRTC_H
diff --git a/third_party/rocm/include/hip/hcc_detail/host_defines.h b/third_party/rocm/include/hip/hcc_detail/host_defines.h
deleted file mode 100644
index 72f3932..0000000
--- a/third_party/rocm/include/hip/hcc_detail/host_defines.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/**
- * @file hcc_detail/host_defines.h
- * @brief TODO-doc
- */
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HOST_DEFINES_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_HOST_DEFINES_H
-
-
-// Add guard to Generic Grid Launch method
-#ifndef GENERIC_GRID_LAUNCH
-#define GENERIC_GRID_LAUNCH 1
-#endif
-
-#ifdef __HCC__
-/**
- * Function and kernel markers
- */
-#define __host__ __attribute__((cpu))
-#define __device__ __attribute__((hc))
-
-#if GENERIC_GRID_LAUNCH == 0
-#define __global__ __attribute__((hc_grid_launch)) __attribute__((used))
-#else
-#if __hcc_workweek__ >= 17481
-#define __global__ __attribute__((annotate("__HIP_global_function__"), cpu, hc, used))
-#else
-#define __global__ __attribute__((hc, used))
-#endif
-#endif // GENERIC_GRID_LAUNCH
-
-#define __noinline__ __attribute__((noinline))
-#define __forceinline__ inline __attribute__((always_inline))
-
-
-/*
- * Variable Type Qualifiers:
- */
-// _restrict is supported by the compiler
-#define __shared__ tile_static
-#define __constant__ __attribute__((hc, annotate("__HIP_constant__")))
-
-#elif defined(__clang__) && defined(__HIP__)
-
-#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
-#define __host__ __attribute__((host))
-#define __device__ __attribute__((device))
-#define __global__ __attribute__((global))
-#define __shared__ __attribute__((shared))
-#define __constant__ __attribute__((constant))
-#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
-
-#define __noinline__ __attribute__((noinline))
-#define __forceinline__ inline __attribute__((always_inline))
-
-#else
-
-// Non-HCC compiler
-/**
- * Function and kernel markers
- */
-#define __host__
-#define __device__
-
-#define __global__
-
-#define __noinline__
-#define __forceinline__ inline
-
-#define __shared__
-#define __constant__
-
-#endif
-
-#endif
diff --git a/third_party/rocm/include/hip/hcc_detail/hsa_helpers.hpp b/third_party/rocm/include/hip/hcc_detail/hsa_helpers.hpp
deleted file mode 100644
index af4f0c9..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hsa_helpers.hpp
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-#pragma once
-
-#include <hsa/hsa.h>
-
-#include <cstdint>
-#include <functional>
-#include <string>
-
-namespace hip_impl {
-inline void* address(hsa_executable_symbol_t x) {
- void* r = nullptr;
- hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &r);
-
- return r;
-}
-
-inline hsa_agent_t agent(hsa_executable_symbol_t x) {
- hsa_agent_t r = {};
- hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_AGENT, &r);
-
- return r;
-}
-
-inline std::uint32_t group_size(hsa_executable_symbol_t x) {
- std::uint32_t r = 0u;
- hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, &r);
-
- return r;
-}
-
-inline hsa_isa_t isa(hsa_agent_t x) {
- hsa_isa_t r = {};
- hsa_agent_iterate_isas(x,
- [](hsa_isa_t i, void* o) {
- *static_cast<hsa_isa_t*>(o) = i; // Pick the first.
-
- return HSA_STATUS_INFO_BREAK;
- },
- &r);
-
- return r;
-}
-
-inline std::uint64_t kernel_object(hsa_executable_symbol_t x) {
- std::uint64_t r = 0u;
- hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &r);
-
- return r;
-}
-
-inline std::string name(hsa_executable_symbol_t x) {
- std::uint32_t sz = 0u;
- hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &sz);
-
- std::string r(sz, '\0');
- hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_NAME, &r.front());
-
- return r;
-}
-
-inline std::uint32_t private_size(hsa_executable_symbol_t x) {
- std::uint32_t r = 0u;
- hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, &r);
-
- return r;
-}
-
-inline std::uint32_t size(hsa_executable_symbol_t x) {
- std::uint32_t r = 0;
- hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE, &r);
-
- return r;
-}
-
-inline hsa_symbol_kind_t type(hsa_executable_symbol_t x) {
- hsa_symbol_kind_t r = {};
- hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &r);
-
- return r;
-}
-} // namespace hip_impl
\ No newline at end of file
diff --git a/third_party/rocm/include/hip/hcc_detail/library_types.h b/third_party/rocm/include/hip/hcc_detail/library_types.h
deleted file mode 100644
index 6fcd0dc..0000000
--- a/third_party/rocm/include/hip/hcc_detail/library_types.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_LIBRARY_TYPES_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_LIBRARY_TYPES_H
-
-typedef enum hipDataType {
- HIP_R_16F = 2,
- HIP_R_32F = 0,
- HIP_R_64F = 1,
- HIP_C_16F = 6,
- HIP_C_32F = 4,
- HIP_C_64F = 5
-} hipDataType;
-
-typedef enum hipLibraryPropertyType {
- HIP_LIBRARY_MAJOR_VERSION,
- HIP_LIBRARY_MINOR_VERSION,
- HIP_LIBRARY_PATCH_LEVEL
-} hipLibraryPropertyType;
-
-#endif
diff --git a/third_party/rocm/include/hip/hcc_detail/llvm_intrinsics.h b/third_party/rocm/include/hip/hcc_detail/llvm_intrinsics.h
deleted file mode 100644
index 330b3d9..0000000
--- a/third_party/rocm/include/hip/hcc_detail/llvm_intrinsics.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/**
- * @file hcc_detail/llvm_intrinsics.h
- * @brief Contains declarations for wrapper functions for llvm intrinsics
- * like llvm.amdgcn.s.barrier.
- */
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_LLVM_INTRINSICS_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_LLVM_INTRINSICS_H
-
-#include "hip/hcc_detail/host_defines.h"
-
-// FIXME: These should all be removed and proper builtins used.
-__device__
-unsigned __llvm_amdgcn_groupstaticsize() __asm("llvm.amdgcn.groupstaticsize");
-
-__device__
-int __llvm_amdgcn_ds_swizzle(int index, int pattern) __asm("llvm.amdgcn.ds.swizzle");
-
-#endif
diff --git a/third_party/rocm/include/hip/hcc_detail/macro_based_grid_launch.hpp b/third_party/rocm/include/hip/hcc_detail/macro_based_grid_launch.hpp
deleted file mode 100644
index 96d449b..0000000
--- a/third_party/rocm/include/hip/hcc_detail/macro_based_grid_launch.hpp
+++ /dev/null
@@ -1,798 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#pragma once
-
-#include "concepts.hpp"
-#include "helpers.hpp"
-
-#include "hc.hpp"
-#include "hip/hip_ext.h"
-#include "hip_runtime.h"
-
-#include <functional>
-#include <iostream>
-#include <stdexcept>
-#include <type_traits>
-#include <utility>
-
-namespace hip_impl {
-namespace {
-struct New_grid_launch_tag {};
-struct Old_grid_launch_tag {};
-
-template <typename C, typename D>
-class RAII_guard {
- D dtor_;
-
- public:
- RAII_guard() = default;
-
- RAII_guard(const C& ctor, D dtor) : dtor_{std::move(dtor)} { ctor(); }
-
- RAII_guard(const RAII_guard&) = default;
- RAII_guard(RAII_guard&&) = default;
-
- RAII_guard& operator=(const RAII_guard&) = default;
- RAII_guard& operator=(RAII_guard&&) = default;
-
- ~RAII_guard() { dtor_(); }
-};
-
-template <typename C, typename D>
-RAII_guard<C, D> make_RAII_guard(const C& ctor, D dtor) {
- return RAII_guard<C, D>{ctor, std::move(dtor)};
-}
-
-template <FunctionalProcedure F, typename... Ts>
-using is_new_grid_launch_t = typename std::conditional<is_callable<F(Ts...)>{}, New_grid_launch_tag,
- Old_grid_launch_tag>::type;
-} // namespace
-
-// TODO: - dispatch rank should be derived from the domain dimensions passed
-// in, and not always assumed to be 3;
-
-template <FunctionalProcedure K, typename... Ts>
-requires(Domain<K> ==
- {Ts...}) inline void grid_launch_hip_impl_(New_grid_launch_tag, dim3 num_blocks,
- dim3 dim_blocks, int group_mem_bytes,
- const hc::accelerator_view& acc_v, K k) {
- const auto d =
- hc::extent<3>{num_blocks.z * dim_blocks.z, num_blocks.y * dim_blocks.y,
- num_blocks.x * dim_blocks.x}
- .tile_with_dynamic(dim_blocks.z, dim_blocks.y, dim_blocks.x, group_mem_bytes);
-
- try {
- hc::parallel_for_each(acc_v, d, k);
- } catch (std::exception& ex) {
- std::cerr << "Failed in " << __func__ << ", with exception: " << ex.what() << std::endl;
- hip_throw(ex);
- }
-}
-
-// TODO: these are workarounds, they should be removed.
-
-hc::accelerator_view lock_stream_hip_(hipStream_t&, void*&);
-void print_prelaunch_trace_(const char*, dim3, dim3, int, hipStream_t);
-void unlock_stream_hip_(hipStream_t, void*, const char*, hc::accelerator_view*);
-
-template <FunctionalProcedure K, typename... Ts>
-requires(Domain<K> == {Ts...}) inline void grid_launch_hip_impl_(New_grid_launch_tag,
- dim3 num_blocks, dim3 dim_blocks,
- int group_mem_bytes,
- hipStream_t stream,
- const char* kernel_name, K k) {
- void* lck_stream = nullptr;
- auto acc_v = lock_stream_hip_(stream, lck_stream);
- auto stream_guard =
- make_RAII_guard(std::bind(print_prelaunch_trace_, kernel_name, num_blocks, dim_blocks,
- group_mem_bytes, stream),
- std::bind(unlock_stream_hip_, stream, lck_stream, kernel_name, &acc_v));
-
- try {
- grid_launch_hip_impl_(New_grid_launch_tag{}, std::move(num_blocks), std::move(dim_blocks),
- group_mem_bytes, acc_v, std::move(k));
- } catch (std::exception& ex) {
- std::cerr << "Failed in " << __func__ << ", with exception: " << ex.what() << std::endl;
- hip_throw(ex);
- }
-}
-
-template <FunctionalProcedure K, typename... Ts>
-requires(Domain<K> ==
- {hipLaunchParm, Ts...}) inline void grid_launch_hip_impl_(Old_grid_launch_tag,
- dim3 num_blocks, dim3 dim_blocks,
- int group_mem_bytes,
- hipStream_t stream, K k) {
- grid_launch_hip_impl_(New_grid_launch_tag{}, std::move(num_blocks), std::move(dim_blocks),
- group_mem_bytes, std::move(stream), std::move(k));
-}
-
-template <FunctionalProcedure K, typename... Ts>
-requires(Domain<K> == {hipLaunchParm, Ts...}) inline void grid_launch_hip_impl_(
- Old_grid_launch_tag, dim3 num_blocks, dim3 dim_blocks, int group_mem_bytes, hipStream_t stream,
- const char* kernel_name, K k) {
- grid_launch_hip_impl_(New_grid_launch_tag{}, std::move(num_blocks), std::move(dim_blocks),
- group_mem_bytes, std::move(stream), kernel_name, std::move(k));
-}
-
-template <FunctionalProcedure K, typename... Ts>
-requires(Domain<K> == {Ts...}) inline std::enable_if_t<
- !std::is_function<K>::value> grid_launch_hip_(dim3 num_blocks, dim3 dim_blocks,
- int group_mem_bytes, hipStream_t stream,
- const char* kernel_name, K k) {
- grid_launch_hip_impl_(is_new_grid_launch_t<K, Ts...>{}, std::move(num_blocks),
- std::move(dim_blocks), group_mem_bytes, std::move(stream), kernel_name,
- std::move(k));
-}
-
-template <FunctionalProcedure K, typename... Ts>
-requires(Domain<K> == {Ts...}) inline std::enable_if_t<
- !std::is_function<K>::value> grid_launch_hip_(dim3 num_blocks, dim3 dim_blocks,
- int group_mem_bytes, hipStream_t stream, K k) {
- grid_launch_hip_impl_(is_new_grid_launch_t<K, Ts...>{}, std::move(num_blocks),
- std::move(dim_blocks), group_mem_bytes, std::move(stream), std::move(k));
-}
-
-// TODO: these are temporary and purposefully noisy and disruptive.
-#define make_kernel_name_hip(k, n) \
- HIP_kernel_functor_name_begin##_##k##_##HIP_kernel_functor_name_end##_##n
-
-#define make_kernel_functor_hip_30(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
- p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, \
- p22, p23, p24, p25, p26, p27) \
- struct make_kernel_name_hip(function_name, 28) { \
- std::decay_t<decltype(p0)> _p0_; \
- std::decay_t<decltype(p1)> _p1_; \
- std::decay_t<decltype(p2)> _p2_; \
- std::decay_t<decltype(p3)> _p3_; \
- std::decay_t<decltype(p4)> _p4_; \
- std::decay_t<decltype(p5)> _p5_; \
- std::decay_t<decltype(p6)> _p6_; \
- std::decay_t<decltype(p7)> _p7_; \
- std::decay_t<decltype(p8)> _p8_; \
- std::decay_t<decltype(p9)> _p9_; \
- std::decay_t<decltype(p10)> _p10_; \
- std::decay_t<decltype(p11)> _p11_; \
- std::decay_t<decltype(p12)> _p12_; \
- std::decay_t<decltype(p13)> _p13_; \
- std::decay_t<decltype(p14)> _p14_; \
- std::decay_t<decltype(p15)> _p15_; \
- std::decay_t<decltype(p16)> _p16_; \
- std::decay_t<decltype(p17)> _p17_; \
- std::decay_t<decltype(p18)> _p18_; \
- std::decay_t<decltype(p19)> _p19_; \
- std::decay_t<decltype(p20)> _p20_; \
- std::decay_t<decltype(p21)> _p21_; \
- std::decay_t<decltype(p22)> _p22_; \
- std::decay_t<decltype(p23)> _p23_; \
- std::decay_t<decltype(p24)> _p24_; \
- std::decay_t<decltype(p25)> _p25_; \
- std::decay_t<decltype(p26)> _p26_; \
- std::decay_t<decltype(p27)> _p27_; \
- void operator()(const hc::tiled_index<3>&) const [[hc]] { \
- kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \
- _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_, \
- _p22_, _p23_, _p24_, _p25_, _p26_, _p27_); \
- } \
- }
-#define make_kernel_functor_hip_29(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
- p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, \
- p22, p23, p24, p25, p26) \
- struct make_kernel_name_hip(function_name, 27) { \
- std::decay_t<decltype(p0)> _p0_; \
- std::decay_t<decltype(p1)> _p1_; \
- std::decay_t<decltype(p2)> _p2_; \
- std::decay_t<decltype(p3)> _p3_; \
- std::decay_t<decltype(p4)> _p4_; \
- std::decay_t<decltype(p5)> _p5_; \
- std::decay_t<decltype(p6)> _p6_; \
- std::decay_t<decltype(p7)> _p7_; \
- std::decay_t<decltype(p8)> _p8_; \
- std::decay_t<decltype(p9)> _p9_; \
- std::decay_t<decltype(p10)> _p10_; \
- std::decay_t<decltype(p11)> _p11_; \
- std::decay_t<decltype(p12)> _p12_; \
- std::decay_t<decltype(p13)> _p13_; \
- std::decay_t<decltype(p14)> _p14_; \
- std::decay_t<decltype(p15)> _p15_; \
- std::decay_t<decltype(p16)> _p16_; \
- std::decay_t<decltype(p17)> _p17_; \
- std::decay_t<decltype(p18)> _p18_; \
- std::decay_t<decltype(p19)> _p19_; \
- std::decay_t<decltype(p20)> _p20_; \
- std::decay_t<decltype(p21)> _p21_; \
- std::decay_t<decltype(p22)> _p22_; \
- std::decay_t<decltype(p23)> _p23_; \
- std::decay_t<decltype(p24)> _p24_; \
- std::decay_t<decltype(p25)> _p25_; \
- std::decay_t<decltype(p26)> _p26_; \
- void operator()(const hc::tiled_index<3>&) const [[hc]] { \
- kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \
- _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_, \
- _p22_, _p23_, _p24_, _p25_, _p26_); \
- } \
- }
-#define make_kernel_functor_hip_28(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
- p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, \
- p22, p23, p24, p25) \
- struct make_kernel_name_hip(function_name, 26) { \
- std::decay_t<decltype(p0)> _p0_; \
- std::decay_t<decltype(p1)> _p1_; \
- std::decay_t<decltype(p2)> _p2_; \
- std::decay_t<decltype(p3)> _p3_; \
- std::decay_t<decltype(p4)> _p4_; \
- std::decay_t<decltype(p5)> _p5_; \
- std::decay_t<decltype(p6)> _p6_; \
- std::decay_t<decltype(p7)> _p7_; \
- std::decay_t<decltype(p8)> _p8_; \
- std::decay_t<decltype(p9)> _p9_; \
- std::decay_t<decltype(p10)> _p10_; \
- std::decay_t<decltype(p11)> _p11_; \
- std::decay_t<decltype(p12)> _p12_; \
- std::decay_t<decltype(p13)> _p13_; \
- std::decay_t<decltype(p14)> _p14_; \
- std::decay_t<decltype(p15)> _p15_; \
- std::decay_t<decltype(p16)> _p16_; \
- std::decay_t<decltype(p17)> _p17_; \
- std::decay_t<decltype(p18)> _p18_; \
- std::decay_t<decltype(p19)> _p19_; \
- std::decay_t<decltype(p20)> _p20_; \
- std::decay_t<decltype(p21)> _p21_; \
- std::decay_t<decltype(p22)> _p22_; \
- std::decay_t<decltype(p23)> _p23_; \
- std::decay_t<decltype(p24)> _p24_; \
- std::decay_t<decltype(p25)> _p25_; \
- void operator()(const hc::tiled_index<3>&) const [[hc]] { \
- kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \
- _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_, \
- _p22_, _p23_, _p24_, _p25_); \
- } \
- }
-#define make_kernel_functor_hip_27(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
- p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, \
- p22, p23, p24) \
- struct make_kernel_name_hip(function_name, 25) { \
- std::decay_t<decltype(p0)> _p0_; \
- std::decay_t<decltype(p1)> _p1_; \
- std::decay_t<decltype(p2)> _p2_; \
- std::decay_t<decltype(p3)> _p3_; \
- std::decay_t<decltype(p4)> _p4_; \
- std::decay_t<decltype(p5)> _p5_; \
- std::decay_t<decltype(p6)> _p6_; \
- std::decay_t<decltype(p7)> _p7_; \
- std::decay_t<decltype(p8)> _p8_; \
- std::decay_t<decltype(p9)> _p9_; \
- std::decay_t<decltype(p10)> _p10_; \
- std::decay_t<decltype(p11)> _p11_; \
- std::decay_t<decltype(p12)> _p12_; \
- std::decay_t<decltype(p13)> _p13_; \
- std::decay_t<decltype(p14)> _p14_; \
- std::decay_t<decltype(p15)> _p15_; \
- std::decay_t<decltype(p16)> _p16_; \
- std::decay_t<decltype(p17)> _p17_; \
- std::decay_t<decltype(p18)> _p18_; \
- std::decay_t<decltype(p19)> _p19_; \
- std::decay_t<decltype(p20)> _p20_; \
- std::decay_t<decltype(p21)> _p21_; \
- std::decay_t<decltype(p22)> _p22_; \
- std::decay_t<decltype(p23)> _p23_; \
- std::decay_t<decltype(p24)> _p24_; \
- void operator()(const hc::tiled_index<3>&) const [[hc]] { \
- kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \
- _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_, \
- _p22_, _p23_, _p24_); \
- } \
- }
-#define make_kernel_functor_hip_26(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
- p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, \
- p22, p23) \
- struct make_kernel_name_hip(function_name, 24) { \
- std::decay_t<decltype(p0)> _p0_; \
- std::decay_t<decltype(p1)> _p1_; \
- std::decay_t<decltype(p2)> _p2_; \
- std::decay_t<decltype(p3)> _p3_; \
- std::decay_t<decltype(p4)> _p4_; \
- std::decay_t<decltype(p5)> _p5_; \
- std::decay_t<decltype(p6)> _p6_; \
- std::decay_t<decltype(p7)> _p7_; \
- std::decay_t<decltype(p8)> _p8_; \
- std::decay_t<decltype(p9)> _p9_; \
- std::decay_t<decltype(p10)> _p10_; \
- std::decay_t<decltype(p11)> _p11_; \
- std::decay_t<decltype(p12)> _p12_; \
- std::decay_t<decltype(p13)> _p13_; \
- std::decay_t<decltype(p14)> _p14_; \
- std::decay_t<decltype(p15)> _p15_; \
- std::decay_t<decltype(p16)> _p16_; \
- std::decay_t<decltype(p17)> _p17_; \
- std::decay_t<decltype(p18)> _p18_; \
- std::decay_t<decltype(p19)> _p19_; \
- std::decay_t<decltype(p20)> _p20_; \
- std::decay_t<decltype(p21)> _p21_; \
- std::decay_t<decltype(p22)> _p22_; \
- std::decay_t<decltype(p23)> _p23_; \
- void operator()(const hc::tiled_index<3>&) const [[hc]] { \
- kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \
- _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_, \
- _p22_, _p23_); \
- } \
- }
-#define make_kernel_functor_hip_25(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
- p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, \
- p22) \
- struct make_kernel_name_hip(function_name, 23) { \
- std::decay_t<decltype(p0)> _p0_; \
- std::decay_t<decltype(p1)> _p1_; \
- std::decay_t<decltype(p2)> _p2_; \
- std::decay_t<decltype(p3)> _p3_; \
- std::decay_t<decltype(p4)> _p4_; \
- std::decay_t<decltype(p5)> _p5_; \
- std::decay_t<decltype(p6)> _p6_; \
- std::decay_t<decltype(p7)> _p7_; \
- std::decay_t<decltype(p8)> _p8_; \
- std::decay_t<decltype(p9)> _p9_; \
- std::decay_t<decltype(p10)> _p10_; \
- std::decay_t<decltype(p11)> _p11_; \
- std::decay_t<decltype(p12)> _p12_; \
- std::decay_t<decltype(p13)> _p13_; \
- std::decay_t<decltype(p14)> _p14_; \
- std::decay_t<decltype(p15)> _p15_; \
- std::decay_t<decltype(p16)> _p16_; \
- std::decay_t<decltype(p17)> _p17_; \
- std::decay_t<decltype(p18)> _p18_; \
- std::decay_t<decltype(p19)> _p19_; \
- std::decay_t<decltype(p20)> _p20_; \
- std::decay_t<decltype(p21)> _p21_; \
- std::decay_t<decltype(p22)> _p22_; \
- __attribute__((used, flatten)) void operator()(const hc::tiled_index<3>&) const [[hc]] { \
- kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \
- _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_, \
- _p22_); \
- } \
- }
-#define make_kernel_functor_hip_24(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
- p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21) \
- struct make_kernel_name_hip(function_name, 22) { \
- std::decay_t<decltype(p0)> _p0_; \
- std::decay_t<decltype(p1)> _p1_; \
- std::decay_t<decltype(p2)> _p2_; \
- std::decay_t<decltype(p3)> _p3_; \
- std::decay_t<decltype(p4)> _p4_; \
- std::decay_t<decltype(p5)> _p5_; \
- std::decay_t<decltype(p6)> _p6_; \
- std::decay_t<decltype(p7)> _p7_; \
- std::decay_t<decltype(p8)> _p8_; \
- std::decay_t<decltype(p9)> _p9_; \
- std::decay_t<decltype(p10)> _p10_; \
- std::decay_t<decltype(p11)> _p11_; \
- std::decay_t<decltype(p12)> _p12_; \
- std::decay_t<decltype(p13)> _p13_; \
- std::decay_t<decltype(p14)> _p14_; \
- std::decay_t<decltype(p15)> _p15_; \
- std::decay_t<decltype(p16)> _p16_; \
- std::decay_t<decltype(p17)> _p17_; \
- std::decay_t<decltype(p18)> _p18_; \
- std::decay_t<decltype(p19)> _p19_; \
- std::decay_t<decltype(p20)> _p20_; \
- std::decay_t<decltype(p21)> _p21_; \
- void operator()(const hc::tiled_index<3>&) const [[hc]] { \
- kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \
- _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_); \
- } \
- }
-#define make_kernel_functor_hip_23(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
- p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20) \
- struct make_kernel_name_hip(function_name, 21) { \
- std::decay_t<decltype(p0)> _p0_; \
- std::decay_t<decltype(p1)> _p1_; \
- std::decay_t<decltype(p2)> _p2_; \
- std::decay_t<decltype(p3)> _p3_; \
- std::decay_t<decltype(p4)> _p4_; \
- std::decay_t<decltype(p5)> _p5_; \
- std::decay_t<decltype(p6)> _p6_; \
- std::decay_t<decltype(p7)> _p7_; \
- std::decay_t<decltype(p8)> _p8_; \
- std::decay_t<decltype(p9)> _p9_; \
- std::decay_t<decltype(p10)> _p10_; \
- std::decay_t<decltype(p11)> _p11_; \
- std::decay_t<decltype(p12)> _p12_; \
- std::decay_t<decltype(p13)> _p13_; \
- std::decay_t<decltype(p14)> _p14_; \
- std::decay_t<decltype(p15)> _p15_; \
- std::decay_t<decltype(p16)> _p16_; \
- std::decay_t<decltype(p17)> _p17_; \
- std::decay_t<decltype(p18)> _p18_; \
- std::decay_t<decltype(p19)> _p19_; \
- std::decay_t<decltype(p20)> _p20_; \
- void operator()(const hc::tiled_index<3>&) const [[hc]] { \
- kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \
- _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_); \
- } \
- }
-#define make_kernel_functor_hip_22(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
- p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19) \
- struct make_kernel_name_hip(function_name, 20) { \
- std::decay_t<decltype(p0)> _p0_; \
- std::decay_t<decltype(p1)> _p1_; \
- std::decay_t<decltype(p2)> _p2_; \
- std::decay_t<decltype(p3)> _p3_; \
- std::decay_t<decltype(p4)> _p4_; \
- std::decay_t<decltype(p5)> _p5_; \
- std::decay_t<decltype(p6)> _p6_; \
- std::decay_t<decltype(p7)> _p7_; \
- std::decay_t<decltype(p8)> _p8_; \
- std::decay_t<decltype(p9)> _p9_; \
- std::decay_t<decltype(p10)> _p10_; \
- std::decay_t<decltype(p11)> _p11_; \
- std::decay_t<decltype(p12)> _p12_; \
- std::decay_t<decltype(p13)> _p13_; \
- std::decay_t<decltype(p14)> _p14_; \
- std::decay_t<decltype(p15)> _p15_; \
- std::decay_t<decltype(p16)> _p16_; \
- std::decay_t<decltype(p17)> _p17_; \
- std::decay_t<decltype(p18)> _p18_; \
- std::decay_t<decltype(p19)> _p19_; \
- void operator()(const hc::tiled_index<3>&) const [[hc]] { \
- kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \
- _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_); \
- } \
- }
-#define make_kernel_functor_hip_21(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
- p9, p10, p11, p12, p13, p14, p15, p16, p17, p18) \
- struct make_kernel_name_hip(function_name, 19) { \
- std::decay_t<decltype(p0)> _p0_; \
- std::decay_t<decltype(p1)> _p1_; \
- std::decay_t<decltype(p2)> _p2_; \
- std::decay_t<decltype(p3)> _p3_; \
- std::decay_t<decltype(p4)> _p4_; \
- std::decay_t<decltype(p5)> _p5_; \
- std::decay_t<decltype(p6)> _p6_; \
- std::decay_t<decltype(p7)> _p7_; \
- std::decay_t<decltype(p8)> _p8_; \
- std::decay_t<decltype(p9)> _p9_; \
- std::decay_t<decltype(p10)> _p10_; \
- std::decay_t<decltype(p11)> _p11_; \
- std::decay_t<decltype(p12)> _p12_; \
- std::decay_t<decltype(p13)> _p13_; \
- std::decay_t<decltype(p14)> _p14_; \
- std::decay_t<decltype(p15)> _p15_; \
- std::decay_t<decltype(p16)> _p16_; \
- std::decay_t<decltype(p17)> _p17_; \
- std::decay_t<decltype(p18)> _p18_; \
- void operator()(const hc::tiled_index<3>&) const [[hc]] { \
- kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \
- _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_); \
- } \
- }
-#define make_kernel_functor_hip_20(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
- p9, p10, p11, p12, p13, p14, p15, p16, p17) \
- struct make_kernel_name_hip(function_name, 18) { \
- std::decay_t<decltype(p0)> _p0_; \
- std::decay_t<decltype(p1)> _p1_; \
- std::decay_t<decltype(p2)> _p2_; \
- std::decay_t<decltype(p3)> _p3_; \
- std::decay_t<decltype(p4)> _p4_; \
- std::decay_t<decltype(p5)> _p5_; \
- std::decay_t<decltype(p6)> _p6_; \
- std::decay_t<decltype(p7)> _p7_; \
- std::decay_t<decltype(p8)> _p8_; \
- std::decay_t<decltype(p9)> _p9_; \
- std::decay_t<decltype(p10)> _p10_; \
- std::decay_t<decltype(p11)> _p11_; \
- std::decay_t<decltype(p12)> _p12_; \
- std::decay_t<decltype(p13)> _p13_; \
- std::decay_t<decltype(p14)> _p14_; \
- std::decay_t<decltype(p15)> _p15_; \
- std::decay_t<decltype(p16)> _p16_; \
- std::decay_t<decltype(p17)> _p17_; \
- void operator()(const hc::tiled_index<3>&) const [[hc]] { \
- kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \
- _p12_, _p13_, _p14_, _p15_, _p16_, _p17_); \
- } \
- }
-#define make_kernel_functor_hip_19(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
- p9, p10, p11, p12, p13, p14, p15, p16) \
- struct make_kernel_name_hip(function_name, 17) { \
- std::decay_t<decltype(p0)> _p0_; \
- std::decay_t<decltype(p1)> _p1_; \
- std::decay_t<decltype(p2)> _p2_; \
- std::decay_t<decltype(p3)> _p3_; \
- std::decay_t<decltype(p4)> _p4_; \
- std::decay_t<decltype(p5)> _p5_; \
- std::decay_t<decltype(p6)> _p6_; \
- std::decay_t<decltype(p7)> _p7_; \
- std::decay_t<decltype(p8)> _p8_; \
- std::decay_t<decltype(p9)> _p9_; \
- std::decay_t<decltype(p10)> _p10_; \
- std::decay_t<decltype(p11)> _p11_; \
- std::decay_t<decltype(p12)> _p12_; \
- std::decay_t<decltype(p13)> _p13_; \
- std::decay_t<decltype(p14)> _p14_; \
- std::decay_t<decltype(p15)> _p15_; \
- std::decay_t<decltype(p16)> _p16_; \
- void operator()(const hc::tiled_index<3>&) const [[hc]] { \
- kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \
- _p12_, _p13_, _p14_, _p15_, _p16_); \
- } \
- }
-#define make_kernel_functor_hip_18(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
- p9, p10, p11, p12, p13, p14, p15) \
- struct make_kernel_name_hip(function_name, 16) { \
- std::decay_t<decltype(p0)> _p0_; \
- std::decay_t<decltype(p1)> _p1_; \
- std::decay_t<decltype(p2)> _p2_; \
- std::decay_t<decltype(p3)> _p3_; \
- std::decay_t<decltype(p4)> _p4_; \
- std::decay_t<decltype(p5)> _p5_; \
- std::decay_t<decltype(p6)> _p6_; \
- std::decay_t<decltype(p7)> _p7_; \
- std::decay_t<decltype(p8)> _p8_; \
- std::decay_t<decltype(p9)> _p9_; \
- std::decay_t<decltype(p10)> _p10_; \
- std::decay_t<decltype(p11)> _p11_; \
- std::decay_t<decltype(p12)> _p12_; \
- std::decay_t<decltype(p13)> _p13_; \
- std::decay_t<decltype(p14)> _p14_; \
- std::decay_t<decltype(p15)> _p15_; \
- void operator()(const hc::tiled_index<3>&) const [[hc]] { \
- kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \
- _p12_, _p13_, _p14_, _p15_); \
- } \
- }
-#define make_kernel_functor_hip_17(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
- p9, p10, p11, p12, p13, p14) \
- struct make_kernel_name_hip(function_name, 15) { \
- std::decay_t<decltype(p0)> _p0_; \
- std::decay_t<decltype(p1)> _p1_; \
- std::decay_t<decltype(p2)> _p2_; \
- std::decay_t<decltype(p3)> _p3_; \
- std::decay_t<decltype(p4)> _p4_; \
- std::decay_t<decltype(p5)> _p5_; \
- std::decay_t<decltype(p6)> _p6_; \
- std::decay_t<decltype(p7)> _p7_; \
- std::decay_t<decltype(p8)> _p8_; \
- std::decay_t<decltype(p9)> _p9_; \
- std::decay_t<decltype(p10)> _p10_; \
- std::decay_t<decltype(p11)> _p11_; \
- std::decay_t<decltype(p12)> _p12_; \
- std::decay_t<decltype(p13)> _p13_; \
- std::decay_t<decltype(p14)> _p14_; \
- void operator()(const hc::tiled_index<3>&) const [[hc]] { \
- kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \
- _p12_, _p13_, _p14_); \
- } \
- }
-#define make_kernel_functor_hip_16(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
- p9, p10, p11, p12, p13) \
- struct make_kernel_name_hip(function_name, 14) { \
- std::decay_t<decltype(p0)> _p0_; \
- std::decay_t<decltype(p1)> _p1_; \
- std::decay_t<decltype(p2)> _p2_; \
- std::decay_t<decltype(p3)> _p3_; \
- std::decay_t<decltype(p4)> _p4_; \
- std::decay_t<decltype(p5)> _p5_; \
- std::decay_t<decltype(p6)> _p6_; \
- std::decay_t<decltype(p7)> _p7_; \
- std::decay_t<decltype(p8)> _p8_; \
- std::decay_t<decltype(p9)> _p9_; \
- std::decay_t<decltype(p10)> _p10_; \
- std::decay_t<decltype(p11)> _p11_; \
- std::decay_t<decltype(p12)> _p12_; \
- std::decay_t<decltype(p13)> _p13_; \
- void operator()(const hc::tiled_index<3>&) const [[hc]] { \
- kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \
- _p12_, _p13_); \
- } \
- }
-#define make_kernel_functor_hip_15(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
- p9, p10, p11, p12) \
- struct make_kernel_name_hip(function_name, 13) { \
- std::decay_t<decltype(p0)> _p0_; \
- std::decay_t<decltype(p1)> _p1_; \
- std::decay_t<decltype(p2)> _p2_; \
- std::decay_t<decltype(p3)> _p3_; \
- std::decay_t<decltype(p4)> _p4_; \
- std::decay_t<decltype(p5)> _p5_; \
- std::decay_t<decltype(p6)> _p6_; \
- std::decay_t<decltype(p7)> _p7_; \
- std::decay_t<decltype(p8)> _p8_; \
- std::decay_t<decltype(p9)> _p9_; \
- std::decay_t<decltype(p10)> _p10_; \
- std::decay_t<decltype(p11)> _p11_; \
- std::decay_t<decltype(p12)> _p12_; \
- void operator()(const hc::tiled_index<3>&) const [[hc]] { \
- kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \
- _p12_); \
- } \
- }
-#define make_kernel_functor_hip_14(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
- p9, p10, p11) \
- struct make_kernel_name_hip(function_name, 12) { \
- std::decay_t<decltype(p0)> _p0_; \
- std::decay_t<decltype(p1)> _p1_; \
- std::decay_t<decltype(p2)> _p2_; \
- std::decay_t<decltype(p3)> _p3_; \
- std::decay_t<decltype(p4)> _p4_; \
- std::decay_t<decltype(p5)> _p5_; \
- std::decay_t<decltype(p6)> _p6_; \
- std::decay_t<decltype(p7)> _p7_; \
- std::decay_t<decltype(p8)> _p8_; \
- std::decay_t<decltype(p9)> _p9_; \
- std::decay_t<decltype(p10)> _p10_; \
- std::decay_t<decltype(p11)> _p11_; \
- void operator()(const hc::tiled_index<3>&) const [[hc]] { \
- kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_); \
- } \
- }
-#define make_kernel_functor_hip_13(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
- p9, p10) \
- struct make_kernel_name_hip(function_name, 11) { \
- std::decay_t<decltype(p0)> _p0_; \
- std::decay_t<decltype(p1)> _p1_; \
- std::decay_t<decltype(p2)> _p2_; \
- std::decay_t<decltype(p3)> _p3_; \
- std::decay_t<decltype(p4)> _p4_; \
- std::decay_t<decltype(p5)> _p5_; \
- std::decay_t<decltype(p6)> _p6_; \
- std::decay_t<decltype(p7)> _p7_; \
- std::decay_t<decltype(p8)> _p8_; \
- std::decay_t<decltype(p9)> _p9_; \
- std::decay_t<decltype(p10)> _p10_; \
- void operator()(const hc::tiled_index<3>&) const [[hc]] { \
- kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_); \
- } \
- }
-#define make_kernel_functor_hip_12(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
- p9) \
- struct make_kernel_name_hip(function_name, 10) { \
- std::decay_t<decltype(p0)> _p0_; \
- std::decay_t<decltype(p1)> _p1_; \
- std::decay_t<decltype(p2)> _p2_; \
- std::decay_t<decltype(p3)> _p3_; \
- std::decay_t<decltype(p4)> _p4_; \
- std::decay_t<decltype(p5)> _p5_; \
- std::decay_t<decltype(p6)> _p6_; \
- std::decay_t<decltype(p7)> _p7_; \
- std::decay_t<decltype(p8)> _p8_; \
- std::decay_t<decltype(p9)> _p9_; \
- void operator()(const hc::tiled_index<3>&) const \
- [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_); } \
- }
-#define make_kernel_functor_hip_11(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8) \
- struct make_kernel_name_hip(function_name, 9) { \
- std::decay_t<decltype(p0)> _p0_; \
- std::decay_t<decltype(p1)> _p1_; \
- std::decay_t<decltype(p2)> _p2_; \
- std::decay_t<decltype(p3)> _p3_; \
- std::decay_t<decltype(p4)> _p4_; \
- std::decay_t<decltype(p5)> _p5_; \
- std::decay_t<decltype(p6)> _p6_; \
- std::decay_t<decltype(p7)> _p7_; \
- std::decay_t<decltype(p8)> _p8_; \
- void operator()(const hc::tiled_index<3>&) const \
- [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_); } \
- }
-#define make_kernel_functor_hip_10(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7) \
- struct make_kernel_name_hip(function_name, 8) { \
- std::decay_t<decltype(p0)> _p0_; \
- std::decay_t<decltype(p1)> _p1_; \
- std::decay_t<decltype(p2)> _p2_; \
- std::decay_t<decltype(p3)> _p3_; \
- std::decay_t<decltype(p4)> _p4_; \
- std::decay_t<decltype(p5)> _p5_; \
- std::decay_t<decltype(p6)> _p6_; \
- std::decay_t<decltype(p7)> _p7_; \
- void operator()(const hc::tiled_index<3>&) const \
- [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_); } \
- }
-#define make_kernel_functor_hip_9(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6) \
- struct make_kernel_name_hip(function_name, 7) { \
- std::decay_t<decltype(p0)> _p0_; \
- std::decay_t<decltype(p1)> _p1_; \
- std::decay_t<decltype(p2)> _p2_; \
- std::decay_t<decltype(p3)> _p3_; \
- std::decay_t<decltype(p4)> _p4_; \
- std::decay_t<decltype(p5)> _p5_; \
- std::decay_t<decltype(p6)> _p6_; \
- void operator()(const hc::tiled_index<3>&) const \
- [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_); } \
- }
-#define make_kernel_functor_hip_8(function_name, kernel_name, p0, p1, p2, p3, p4, p5) \
- struct make_kernel_name_hip(function_name, 6) { \
- std::decay_t<decltype(p0)> _p0_; \
- std::decay_t<decltype(p1)> _p1_; \
- std::decay_t<decltype(p2)> _p2_; \
- std::decay_t<decltype(p3)> _p3_; \
- std::decay_t<decltype(p4)> _p4_; \
- std::decay_t<decltype(p5)> _p5_; \
- void operator()(const hc::tiled_index<3>&) const \
- [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_); } \
- }
-#define make_kernel_functor_hip_7(function_name, kernel_name, p0, p1, p2, p3, p4) \
- struct make_kernel_name_hip(function_name, 5) { \
- std::decay_t<decltype(p0)> _p0_; \
- std::decay_t<decltype(p1)> _p1_; \
- std::decay_t<decltype(p2)> _p2_; \
- std::decay_t<decltype(p3)> _p3_; \
- std::decay_t<decltype(p4)> _p4_; \
- void operator()(const hc::tiled_index<3>&) const \
- [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_); } \
- }
-#define make_kernel_functor_hip_6(function_name, kernel_name, p0, p1, p2, p3) \
- struct make_kernel_name_hip(function_name, 4) { \
- std::decay_t<decltype(p0)> _p0_; \
- std::decay_t<decltype(p1)> _p1_; \
- std::decay_t<decltype(p2)> _p2_; \
- std::decay_t<decltype(p3)> _p3_; \
- void operator()(const hc::tiled_index<3>&) const \
- [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_); } \
- }
-#define make_kernel_functor_hip_5(function_name, kernel_name, p0, p1, p2) \
- struct make_kernel_name_hip(function_name, 3) { \
- std::decay_t<decltype(p0)> _p0_; \
- std::decay_t<decltype(p1)> _p1_; \
- std::decay_t<decltype(p2)> _p2_; \
- void operator()(const hc::tiled_index<3>&) const [[hc]] { kernel_name(_p0_, _p1_, _p2_); } \
- }
-#define make_kernel_functor_hip_4(function_name, kernel_name, p0, p1) \
- struct make_kernel_name_hip(function_name, 2) { \
- std::decay_t<decltype(p0)> _p0_; \
- std::decay_t<decltype(p1)> _p1_; \
- void operator()(const hc::tiled_index<3>&) const [[hc]] { kernel_name(_p0_, _p1_); } \
- }
-#define fofo(f, n) kernel_prefix_hip##f##kernel_suffix_hip##n
-#define make_kernel_functor_hip_3(function_name, kernel_name, p0) \
- struct make_kernel_name_hip(function_name, 1) { \
- std::decay_t<decltype(p0)> _p0_; \
- void operator()(const hc::tiled_index<3>&) const [[hc]] { kernel_name(_p0_); } \
- }
-#define make_kernel_functor_hip_2(function_name, kernel_name) \
- struct make_kernel_name_hip(function_name, 0) { \
- void operator()(const hc::tiled_index<3>&)[[hc]] { return kernel_name(hipLaunchParm{}); } \
- }
-#define make_kernel_functor_hip_1(...)
-#define make_kernel_functor_hip_0(...)
-#define make_kernel_functor_hip_(...) overload_macro_hip_(make_kernel_functor_hip_, __VA_ARGS__)
-
-
-#define hipLaunchNamedKernelGGL(function_name, kernel_name, num_blocks, dim_blocks, \
- group_mem_bytes, stream, ...) \
- do { \
- make_kernel_functor_hip_(function_name, kernel_name, __VA_ARGS__) \
- hip_kernel_functor_impl_{__VA_ARGS__}; \
- hip_impl::grid_launch_hip_(num_blocks, dim_blocks, group_mem_bytes, stream, #kernel_name, \
- hip_kernel_functor_impl_); \
- } while (0)
-
-#define hipLaunchKernelGGL(kernel_name, num_blocks, dim_blocks, group_mem_bytes, stream, ...) \
- do { \
- hipLaunchNamedKernelGGL(unnamed, kernel_name, num_blocks, dim_blocks, group_mem_bytes, \
- stream, ##__VA_ARGS__); \
- } while (0)
-
-#define hipLaunchKernel(kernel_name, num_blocks, dim_blocks, group_mem_bytes, stream, ...) \
- do { \
- hipLaunchKernelGGL(kernel_name, num_blocks, dim_blocks, group_mem_bytes, stream, \
- hipLaunchParm{}, ##__VA_ARGS__); \
- } while (0)
-} // namespace hip_impl
diff --git a/third_party/rocm/include/hip/hcc_detail/math_functions.h b/third_party/rocm/include/hip/hcc_detail/math_functions.h
deleted file mode 100644
index 3dbc9a2..0000000
--- a/third_party/rocm/include/hip/hcc_detail/math_functions.h
+++ /dev/null
@@ -1,1557 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#pragma once
-
-#include "hip_fp16_math_fwd.h"
-#include "hip_vector_types.h"
-#include "math_fwd.h"
-
-#include <hip/hcc_detail/host_defines.h>
-
-#include <algorithm>
-
-// assert.h is only for the host version of assert.
-// The device version of assert is implemented in hip/hcc_detail/hip_runtime.h.
-// Users should include hip_runtime.h for the device version of assert.
-#if !__HIP_DEVICE_COMPILE__
-#include <assert.h>
-#endif
-
-#include <limits.h>
-#include <limits>
-#include <stdint.h>
-
-// HCC's own math functions should be included first, otherwise there will
-// be conflicts when hip/math_functions.h is included before hip/hip_runtime.h.
-#ifdef __HCC__
-#include "kalmar_math.h"
-#endif
-
-#if _LIBCPP_VERSION && __HIP__
-namespace std {
-template <>
-struct __numeric_type<_Float16>
-{
- static _Float16 __test(_Float16);
-
- typedef _Float16 type;
- static const bool value = true;
-};
-}
-#endif // _LIBCPP_VERSION
-
-#pragma push_macro("__DEVICE__")
-#pragma push_macro("__RETURN_TYPE")
-
-#ifdef __HCC__
-#define __DEVICE__ __device__
-#define __RETURN_TYPE int
-#else // to be consistent with __clang_cuda_math_forward_declares
-#define __DEVICE__ static __device__
-#define __RETURN_TYPE bool
-#endif
-
-#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
-__DEVICE__
-inline
-uint64_t __make_mantissa_base8(const char* tagp)
-{
- uint64_t r = 0;
- while (tagp) {
- char tmp = *tagp;
-
- if (tmp >= '0' && tmp <= '7') r = (r * 8u) + tmp - '0';
- else return 0;
-
- ++tagp;
- }
-
- return r;
-}
-
-__DEVICE__
-inline
-uint64_t __make_mantissa_base10(const char* tagp)
-{
- uint64_t r = 0;
- while (tagp) {
- char tmp = *tagp;
-
- if (tmp >= '0' && tmp <= '9') r = (r * 10u) + tmp - '0';
- else return 0;
-
- ++tagp;
- }
-
- return r;
-}
-
-__DEVICE__
-inline
-uint64_t __make_mantissa_base16(const char* tagp)
-{
- uint64_t r = 0;
- while (tagp) {
- char tmp = *tagp;
-
- if (tmp >= '0' && tmp <= '9') r = (r * 16u) + tmp - '0';
- else if (tmp >= 'a' && tmp <= 'f') r = (r * 16u) + tmp - 'a' + 10;
- else if (tmp >= 'A' && tmp <= 'F') r = (r * 16u) + tmp - 'A' + 10;
- else return 0;
-
- ++tagp;
- }
-
- return r;
-}
-
-__DEVICE__
-inline
-uint64_t __make_mantissa(const char* tagp)
-{
- if (!tagp) return 0u;
-
- if (*tagp == '0') {
- ++tagp;
-
- if (*tagp == 'x' || *tagp == 'X') return __make_mantissa_base16(tagp);
- else return __make_mantissa_base8(tagp);
- }
-
- return __make_mantissa_base10(tagp);
-}
-#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
-
-// DOT FUNCTIONS
-#if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__
-__DEVICE__
-inline
-int amd_mixed_dot(short2 a, short2 b, int c, bool saturate) {
- return __ockl_sdot2(a.data, b.data, c, saturate);
-}
-__DEVICE__
-inline
-uint amd_mixed_dot(ushort2 a, ushort2 b, uint c, bool saturate) {
- return __ockl_udot2(a.data, b.data, c, saturate);
-}
-__DEVICE__
-inline
-int amd_mixed_dot(char4 a, char4 b, int c, bool saturate) {
- return __ockl_sdot4(a.data, b.data, c, saturate);
-}
-__DEVICE__
-inline
-uint amd_mixed_dot(uchar4 a, uchar4 b, uint c, bool saturate) {
- return __ockl_udot4(a.data, b.data, c, saturate);
-}
-__DEVICE__
-inline
-int amd_mixed_dot(int a, int b, int c, bool saturate) {
- return __ockl_sdot8(a, b, c, saturate);
-}
-__DEVICE__
-inline
-uint amd_mixed_dot(uint a, uint b, uint c, bool saturate) {
- return __ockl_udot8(a, b, c, saturate);
-}
-#endif
-
-#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
-// BEGIN FLOAT
-__DEVICE__
-inline
-float abs(float x) { return __ocml_fabs_f32(x); }
-__DEVICE__
-inline
-float acosf(float x) { return __ocml_acos_f32(x); }
-__DEVICE__
-inline
-float acoshf(float x) { return __ocml_acosh_f32(x); }
-__DEVICE__
-inline
-float asinf(float x) { return __ocml_asin_f32(x); }
-__DEVICE__
-inline
-float asinhf(float x) { return __ocml_asinh_f32(x); }
-__DEVICE__
-inline
-float atan2f(float x, float y) { return __ocml_atan2_f32(x, y); }
-__DEVICE__
-inline
-float atanf(float x) { return __ocml_atan_f32(x); }
-__DEVICE__
-inline
-float atanhf(float x) { return __ocml_atanh_f32(x); }
-__DEVICE__
-inline
-float cbrtf(float x) { return __ocml_cbrt_f32(x); }
-__DEVICE__
-inline
-float ceilf(float x) { return __ocml_ceil_f32(x); }
-__DEVICE__
-inline
-float copysignf(float x, float y) { return __ocml_copysign_f32(x, y); }
-__DEVICE__
-inline
-float cosf(float x) { return __ocml_cos_f32(x); }
-__DEVICE__
-inline
-float coshf(float x) { return __ocml_cosh_f32(x); }
-__DEVICE__
-inline
-float cospif(float x) { return __ocml_cospi_f32(x); }
-__DEVICE__
-inline
-float cyl_bessel_i0f(float x) { return __ocml_i0_f32(x); }
-__DEVICE__
-inline
-float cyl_bessel_i1f(float x) { return __ocml_i1_f32(x); }
-__DEVICE__
-inline
-float erfcf(float x) { return __ocml_erfc_f32(x); }
-__DEVICE__
-inline
-float erfcinvf(float x) { return __ocml_erfcinv_f32(x); }
-__DEVICE__
-inline
-float erfcxf(float x) { return __ocml_erfcx_f32(x); }
-__DEVICE__
-inline
-float erff(float x) { return __ocml_erf_f32(x); }
-__DEVICE__
-inline
-float erfinvf(float x) { return __ocml_erfinv_f32(x); }
-__DEVICE__
-inline
-float exp10f(float x) { return __ocml_exp10_f32(x); }
-__DEVICE__
-inline
-float exp2f(float x) { return __ocml_exp2_f32(x); }
-__DEVICE__
-inline
-float expf(float x) { return __ocml_exp_f32(x); }
-__DEVICE__
-inline
-float expm1f(float x) { return __ocml_expm1_f32(x); }
-__DEVICE__
-inline
-float fabsf(float x) { return __ocml_fabs_f32(x); }
-__DEVICE__
-inline
-float fdimf(float x, float y) { return __ocml_fdim_f32(x, y); }
-__DEVICE__
-inline
-float fdividef(float x, float y) { return x / y; }
-__DEVICE__
-inline
-float floorf(float x) { return __ocml_floor_f32(x); }
-__DEVICE__
-inline
-float fmaf(float x, float y, float z) { return __ocml_fma_f32(x, y, z); }
-__DEVICE__
-inline
-float fmaxf(float x, float y) { return __ocml_fmax_f32(x, y); }
-__DEVICE__
-inline
-float fminf(float x, float y) { return __ocml_fmin_f32(x, y); }
-__DEVICE__
-inline
-float fmodf(float x, float y) { return __ocml_fmod_f32(x, y); }
-__DEVICE__
-inline
-float frexpf(float x, int* nptr)
-{
- int tmp;
- float r =
- __ocml_frexp_f32(x, (__attribute__((address_space(5))) int*) &tmp);
- *nptr = tmp;
-
- return r;
-}
-__DEVICE__
-inline
-float hypotf(float x, float y) { return __ocml_hypot_f32(x, y); }
-__DEVICE__
-inline
-int ilogbf(float x) { return __ocml_ilogb_f32(x); }
-__DEVICE__
-inline
-__RETURN_TYPE isfinite(float x) { return __ocml_isfinite_f32(x); }
-__DEVICE__
-inline
-__RETURN_TYPE isinf(float x) { return __ocml_isinf_f32(x); }
-__DEVICE__
-inline
-__RETURN_TYPE isnan(float x) { return __ocml_isnan_f32(x); }
-__DEVICE__
-inline
-float j0f(float x) { return __ocml_j0_f32(x); }
-__DEVICE__
-inline
-float j1f(float x) { return __ocml_j1_f32(x); }
-__DEVICE__
-inline
-float jnf(int n, float x)
-{ // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
- // for linear recurrences to get O(log n) steps, but it's unclear if
- // it'd be beneficial in this case.
- if (n == 0) return j0f(x);
- if (n == 1) return j1f(x);
-
- float x0 = j0f(x);
- float x1 = j1f(x);
- for (int i = 1; i < n; ++i) {
- float x2 = (2 * i) / x * x1 - x0;
- x0 = x1;
- x1 = x2;
- }
-
- return x1;
-}
-__DEVICE__
-inline
-float ldexpf(float x, int e) { return __ocml_ldexp_f32(x, e); }
-__DEVICE__
-inline
-float lgammaf(float x) { return __ocml_lgamma_f32(x); }
-__DEVICE__
-inline
-long long int llrintf(float x) { return __ocml_rint_f32(x); }
-__DEVICE__
-inline
-long long int llroundf(float x) { return __ocml_round_f32(x); }
-__DEVICE__
-inline
-float log10f(float x) { return __ocml_log10_f32(x); }
-__DEVICE__
-inline
-float log1pf(float x) { return __ocml_log1p_f32(x); }
-__DEVICE__
-inline
-float log2f(float x) { return __ocml_log2_f32(x); }
-__DEVICE__
-inline
-float logbf(float x) { return __ocml_logb_f32(x); }
-__DEVICE__
-inline
-float logf(float x) { return __ocml_log_f32(x); }
-__DEVICE__
-inline
-long int lrintf(float x) { return __ocml_rint_f32(x); }
-__DEVICE__
-inline
-long int lroundf(float x) { return __ocml_round_f32(x); }
-__DEVICE__
-inline
-float modff(float x, float* iptr)
-{
- float tmp;
- float r =
- __ocml_modf_f32(x, (__attribute__((address_space(5))) float*) &tmp);
- *iptr = tmp;
-
- return r;
-}
-__DEVICE__
-inline
-float nanf(const char* tagp)
-{
- union {
- float val;
- struct ieee_float {
- uint32_t mantissa : 22;
- uint32_t quiet : 1;
- uint32_t exponent : 8;
- uint32_t sign : 1;
- } bits;
-
- static_assert(sizeof(float) == sizeof(ieee_float), "");
- } tmp;
-
- tmp.bits.sign = 0u;
- tmp.bits.exponent = ~0u;
- tmp.bits.quiet = 1u;
- tmp.bits.mantissa = __make_mantissa(tagp);
-
- return tmp.val;
-}
-__DEVICE__
-inline
-float nearbyintf(float x) { return __ocml_nearbyint_f32(x); }
-__DEVICE__
-inline
-float nextafterf(float x, float y) { return __ocml_nextafter_f32(x, y); }
-__DEVICE__
-inline
-float norm3df(float x, float y, float z) { return __ocml_len3_f32(x, y, z); }
-__DEVICE__
-inline
-float norm4df(float x, float y, float z, float w)
-{
- return __ocml_len4_f32(x, y, z, w);
-}
-__DEVICE__
-inline
-float normcdff(float x) { return __ocml_ncdf_f32(x); }
-__DEVICE__
-inline
-float normcdfinvf(float x) { return __ocml_ncdfinv_f32(x); }
-__DEVICE__
-inline
-float normf(int dim, const float* a)
-{ // TODO: placeholder until OCML adds support.
- float r = 0;
- while (dim--) { r += a[0] * a[0]; ++a; }
-
- return __ocml_sqrt_f32(r);
-}
-__DEVICE__
-inline
-float powf(float x, float y) { return __ocml_pow_f32(x, y); }
-__DEVICE__
-inline
-float powif(float base, int iexp) { return __ocml_pown_f32(base, iexp); }
-__DEVICE__
-inline
-float rcbrtf(float x) { return __ocml_rcbrt_f32(x); }
-__DEVICE__
-inline
-float remainderf(float x, float y) { return __ocml_remainder_f32(x, y); }
-__DEVICE__
-inline
-float remquof(float x, float y, int* quo)
-{
- int tmp;
- float r =
- __ocml_remquo_f32(x, y, (__attribute__((address_space(5))) int*) &tmp);
- *quo = tmp;
-
- return r;
-}
-__DEVICE__
-inline
-float rhypotf(float x, float y) { return __ocml_rhypot_f32(x, y); }
-__DEVICE__
-inline
-float rintf(float x) { return __ocml_rint_f32(x); }
-__DEVICE__
-inline
-float rnorm3df(float x, float y, float z)
-{
- return __ocml_rlen3_f32(x, y, z);
-}
-
-__DEVICE__
-inline
-float rnorm4df(float x, float y, float z, float w)
-{
- return __ocml_rlen4_f32(x, y, z, w);
-}
-__DEVICE__
-inline
-float rnormf(int dim, const float* a)
-{ // TODO: placeholder until OCML adds support.
- float r = 0;
- while (dim--) { r += a[0] * a[0]; ++a; }
-
- return __ocml_rsqrt_f32(r);
-}
-__DEVICE__
-inline
-float roundf(float x) { return __ocml_round_f32(x); }
-__DEVICE__
-inline
-float rsqrtf(float x) { return __ocml_rsqrt_f32(x); }
-__DEVICE__
-inline
-float scalblnf(float x, long int n)
-{
- return (n < INT_MAX) ? __ocml_scalbn_f32(x, n) : __ocml_scalb_f32(x, n);
-}
-__DEVICE__
-inline
-float scalbnf(float x, int n) { return __ocml_scalbn_f32(x, n); }
-__DEVICE__
-inline
-__RETURN_TYPE signbit(float x) { return __ocml_signbit_f32(x); }
-__DEVICE__
-inline
-void sincosf(float x, float* sptr, float* cptr)
-{
- float tmp;
-
- *sptr =
- __ocml_sincos_f32(x, (__attribute__((address_space(5))) float*) &tmp);
- *cptr = tmp;
-}
-__DEVICE__
-inline
-void sincospif(float x, float* sptr, float* cptr)
-{
- float tmp;
-
- *sptr =
- __ocml_sincospi_f32(x, (__attribute__((address_space(5))) float*) &tmp);
- *cptr = tmp;
-}
-__DEVICE__
-inline
-float sinf(float x) { return __ocml_sin_f32(x); }
-__DEVICE__
-inline
-float sinhf(float x) { return __ocml_sinh_f32(x); }
-__DEVICE__
-inline
-float sinpif(float x) { return __ocml_sinpi_f32(x); }
-__DEVICE__
-inline
-float sqrtf(float x) { return __ocml_sqrt_f32(x); }
-__DEVICE__
-inline
-float tanf(float x) { return __ocml_tan_f32(x); }
-__DEVICE__
-inline
-float tanhf(float x) { return __ocml_tanh_f32(x); }
-__DEVICE__
-inline
-float tgammaf(float x) { return __ocml_tgamma_f32(x); }
-__DEVICE__
-inline
-float truncf(float x) { return __ocml_trunc_f32(x); }
-__DEVICE__
-inline
-float y0f(float x) { return __ocml_y0_f32(x); }
-__DEVICE__
-inline
-float y1f(float x) { return __ocml_y1_f32(x); }
-__DEVICE__
-inline
-float ynf(int n, float x)
-{ // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
- // for linear recurrences to get O(log n) steps, but it's unclear if
- // it'd be beneficial in this case. Placeholder until OCML adds
- // support.
- if (n == 0) return y0f(x);
- if (n == 1) return y1f(x);
-
- float x0 = y0f(x);
- float x1 = y1f(x);
- for (int i = 1; i < n; ++i) {
- float x2 = (2 * i) / x * x1 - x0;
- x0 = x1;
- x1 = x2;
- }
-
- return x1;
-}
-
-// BEGIN INTRINSICS
-__DEVICE__
-inline
-float __cosf(float x) { return __ocml_native_cos_f32(x); }
-__DEVICE__
-inline
-float __exp10f(float x) { return __ocml_native_exp10_f32(x); }
-__DEVICE__
-inline
-float __expf(float x) { return __ocml_native_exp_f32(x); }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-float __fadd_rd(float x, float y) { return __ocml_add_rtn_f32(x, y); }
-#endif
-__DEVICE__
-inline
-float __fadd_rn(float x, float y) { return x + y; }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-float __fadd_ru(float x, float y) { return __ocml_add_rtp_f32(x, y); }
-__DEVICE__
-inline
-float __fadd_rz(float x, float y) { return __ocml_add_rtz_f32(x, y); }
-__DEVICE__
-inline
-float __fdiv_rd(float x, float y) { return __ocml_div_rtn_f32(x, y); }
-#endif
-__DEVICE__
-inline
-float __fdiv_rn(float x, float y) { return x / y; }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-float __fdiv_ru(float x, float y) { return __ocml_div_rtp_f32(x, y); }
-__DEVICE__
-inline
-float __fdiv_rz(float x, float y) { return __ocml_div_rtz_f32(x, y); }
-#endif
-__DEVICE__
-inline
-float __fdividef(float x, float y) { return x / y; }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-float __fmaf_rd(float x, float y, float z)
-{
- return __ocml_fma_rtn_f32(x, y, z);
-}
-#endif
-__DEVICE__
-inline
-float __fmaf_rn(float x, float y, float z)
-{
- return __ocml_fma_f32(x, y, z);
-}
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-float __fmaf_ru(float x, float y, float z)
-{
- return __ocml_fma_rtp_f32(x, y, z);
-}
-__DEVICE__
-inline
-float __fmaf_rz(float x, float y, float z)
-{
- return __ocml_fma_rtz_f32(x, y, z);
-}
-__DEVICE__
-inline
-float __fmul_rd(float x, float y) { return __ocml_mul_rtn_f32(x, y); }
-#endif
-__DEVICE__
-inline
-float __fmul_rn(float x, float y) { return x * y; }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-float __fmul_ru(float x, float y) { return __ocml_mul_rtp_f32(x, y); }
-__DEVICE__
-inline
-float __fmul_rz(float x, float y) { return __ocml_mul_rtz_f32(x, y); }
-__DEVICE__
-inline
-float __frcp_rd(float x) { return __llvm_amdgcn_rcp_f32(x); }
-#endif
-__DEVICE__
-inline
-float __frcp_rn(float x) { return __llvm_amdgcn_rcp_f32(x); }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-float __frcp_ru(float x) { return __llvm_amdgcn_rcp_f32(x); }
-__DEVICE__
-inline
-float __frcp_rz(float x) { return __llvm_amdgcn_rcp_f32(x); }
-#endif
-__DEVICE__
-inline
-float __frsqrt_rn(float x) { return __llvm_amdgcn_rsq_f32(x); }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-float __fsqrt_rd(float x) { return __ocml_sqrt_rtn_f32(x); }
-#endif
-__DEVICE__
-inline
-float __fsqrt_rn(float x) { return __ocml_native_sqrt_f32(x); }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-float __fsqrt_ru(float x) { return __ocml_sqrt_rtp_f32(x); }
-__DEVICE__
-inline
-float __fsqrt_rz(float x) { return __ocml_sqrt_rtz_f32(x); }
-__DEVICE__
-inline
-float __fsub_rd(float x, float y) { return __ocml_sub_rtn_f32(x, y); }
-#endif
-__DEVICE__
-inline
-float __fsub_rn(float x, float y) { return x - y; }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-float __fsub_ru(float x, float y) { return __ocml_sub_rtp_f32(x, y); }
-__DEVICE__
-inline
-float __fsub_rz(float x, float y) { return __ocml_sub_rtz_f32(x, y); }
-#endif
-__DEVICE__
-inline
-float __log10f(float x) { return __ocml_native_log10_f32(x); }
-__DEVICE__
-inline
-float __log2f(float x) { return __ocml_native_log2_f32(x); }
-__DEVICE__
-inline
-float __logf(float x) { return __ocml_native_log_f32(x); }
-__DEVICE__
-inline
-float __powf(float x, float y) { return __ocml_pow_f32(x, y); }
-__DEVICE__
-inline
-float __saturatef(float x) { return (x < 0) ? 0 : ((x > 1) ? 1 : x); }
-__DEVICE__
-inline
-void __sincosf(float x, float* sptr, float* cptr)
-{
- *sptr = __ocml_native_sin_f32(x);
- *cptr = __ocml_native_cos_f32(x);
-}
-__DEVICE__
-inline
-float __sinf(float x) { return __ocml_native_sin_f32(x); }
-__DEVICE__
-inline
-float __tanf(float x) { return __ocml_tan_f32(x); }
-// END INTRINSICS
-// END FLOAT
-
-// BEGIN DOUBLE
-__DEVICE__
-inline
-double abs(double x) { return __ocml_fabs_f64(x); }
-__DEVICE__
-inline
-double acos(double x) { return __ocml_acos_f64(x); }
-__DEVICE__
-inline
-double acosh(double x) { return __ocml_acosh_f64(x); }
-__DEVICE__
-inline
-double asin(double x) { return __ocml_asin_f64(x); }
-__DEVICE__
-inline
-double asinh(double x) { return __ocml_asinh_f64(x); }
-__DEVICE__
-inline
-double atan(double x) { return __ocml_atan_f64(x); }
-__DEVICE__
-inline
-double atan2(double x, double y) { return __ocml_atan2_f64(x, y); }
-__DEVICE__
-inline
-double atanh(double x) { return __ocml_atanh_f64(x); }
-__DEVICE__
-inline
-double cbrt(double x) { return __ocml_cbrt_f64(x); }
-__DEVICE__
-inline
-double ceil(double x) { return __ocml_ceil_f64(x); }
-__DEVICE__
-inline
-double copysign(double x, double y) { return __ocml_copysign_f64(x, y); }
-__DEVICE__
-inline
-double cos(double x) { return __ocml_cos_f64(x); }
-__DEVICE__
-inline
-double cosh(double x) { return __ocml_cosh_f64(x); }
-__DEVICE__
-inline
-double cospi(double x) { return __ocml_cospi_f64(x); }
-__DEVICE__
-inline
-double cyl_bessel_i0(double x) { return __ocml_i0_f64(x); }
-__DEVICE__
-inline
-double cyl_bessel_i1(double x) { return __ocml_i1_f64(x); }
-__DEVICE__
-inline
-double erf(double x) { return __ocml_erf_f64(x); }
-__DEVICE__
-inline
-double erfc(double x) { return __ocml_erfc_f64(x); }
-__DEVICE__
-inline
-double erfcinv(double x) { return __ocml_erfcinv_f64(x); }
-__DEVICE__
-inline
-double erfcx(double x) { return __ocml_erfcx_f64(x); }
-__DEVICE__
-inline
-double erfinv(double x) { return __ocml_erfinv_f64(x); }
-__DEVICE__
-inline
-double exp(double x) { return __ocml_exp_f64(x); }
-__DEVICE__
-inline
-double exp10(double x) { return __ocml_exp10_f64(x); }
-__DEVICE__
-inline
-double exp2(double x) { return __ocml_exp2_f64(x); }
-__DEVICE__
-inline
-double expm1(double x) { return __ocml_expm1_f64(x); }
-__DEVICE__
-inline
-double fabs(double x) { return __ocml_fabs_f64(x); }
-__DEVICE__
-inline
-double fdim(double x, double y) { return __ocml_fdim_f64(x, y); }
-__DEVICE__
-inline
-double floor(double x) { return __ocml_floor_f64(x); }
-__DEVICE__
-inline
-double fma(double x, double y, double z) { return __ocml_fma_f64(x, y, z); }
-__DEVICE__
-inline
-double fmax(double x, double y) { return __ocml_fmax_f64(x, y); }
-__DEVICE__
-inline
-double fmin(double x, double y) { return __ocml_fmin_f64(x, y); }
-__DEVICE__
-inline
-double fmod(double x, double y) { return __ocml_fmod_f64(x, y); }
-__DEVICE__
-inline
-double frexp(double x, int* nptr)
-{
- int tmp;
- double r =
- __ocml_frexp_f64(x, (__attribute__((address_space(5))) int*) &tmp);
- *nptr = tmp;
-
- return r;
-}
-__DEVICE__
-inline
-double hypot(double x, double y) { return __ocml_hypot_f64(x, y); }
-__DEVICE__
-inline
-int ilogb(double x) { return __ocml_ilogb_f64(x); }
-__DEVICE__
-inline
-__RETURN_TYPE isfinite(double x) { return __ocml_isfinite_f64(x); }
-__DEVICE__
-inline
-__RETURN_TYPE isinf(double x) { return __ocml_isinf_f64(x); }
-__DEVICE__
-inline
-__RETURN_TYPE isnan(double x) { return __ocml_isnan_f64(x); }
-__DEVICE__
-inline
-double j0(double x) { return __ocml_j0_f64(x); }
-__DEVICE__
-inline
-double j1(double x) { return __ocml_j1_f64(x); }
-__DEVICE__
-inline
-double jn(int n, double x)
-{ // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
- // for linear recurrences to get O(log n) steps, but it's unclear if
- // it'd be beneficial in this case. Placeholder until OCML adds
- // support.
- if (n == 0) return j0f(x);
- if (n == 1) return j1f(x);
-
- double x0 = j0f(x);
- double x1 = j1f(x);
- for (int i = 1; i < n; ++i) {
- double x2 = (2 * i) / x * x1 - x0;
- x0 = x1;
- x1 = x2;
- }
-
- return x1;
-}
-__DEVICE__
-inline
-double ldexp(double x, int e) { return __ocml_ldexp_f64(x, e); }
-__DEVICE__
-inline
-double lgamma(double x) { return __ocml_lgamma_f64(x); }
-__DEVICE__
-inline
-long long int llrint(double x) { return __ocml_rint_f64(x); }
-__DEVICE__
-inline
-long long int llround(double x) { return __ocml_round_f64(x); }
-__DEVICE__
-inline
-double log(double x) { return __ocml_log_f64(x); }
-__DEVICE__
-inline
-double log10(double x) { return __ocml_log10_f64(x); }
-__DEVICE__
-inline
-double log1p(double x) { return __ocml_log1p_f64(x); }
-__DEVICE__
-inline
-double log2(double x) { return __ocml_log2_f64(x); }
-__DEVICE__
-inline
-double logb(double x) { return __ocml_logb_f64(x); }
-__DEVICE__
-inline
-long int lrint(double x) { return __ocml_rint_f64(x); }
-__DEVICE__
-inline
-long int lround(double x) { return __ocml_round_f64(x); }
-__DEVICE__
-inline
-double modf(double x, double* iptr)
-{
- double tmp;
- double r =
- __ocml_modf_f64(x, (__attribute__((address_space(5))) double*) &tmp);
- *iptr = tmp;
-
- return r;
-}
-__DEVICE__
-inline
-double nan(const char* tagp)
-{
-#if !_WIN32
- union {
- double val;
- struct ieee_double {
- uint64_t mantissa : 51;
- uint32_t quiet : 1;
- uint32_t exponent : 11;
- uint32_t sign : 1;
- } bits;
- static_assert(sizeof(double) == sizeof(ieee_double), "");
- } tmp;
-
- tmp.bits.sign = 0u;
- tmp.bits.exponent = ~0u;
- tmp.bits.quiet = 1u;
- tmp.bits.mantissa = __make_mantissa(tagp);
-
- return tmp.val;
-#else
- static_assert(sizeof(uint64_t)==sizeof(double));
- uint64_t val = __make_mantissa(tagp);
- val |= 0xFFF << 51;
- return *reinterpret_cast<double*>(&val);
-#endif
-}
-__DEVICE__
-inline
-double nearbyint(double x) { return __ocml_nearbyint_f64(x); }
-__DEVICE__
-inline
-double nextafter(double x, double y) { return __ocml_nextafter_f64(x, y); }
-__DEVICE__
-inline
-double norm(int dim, const double* a)
-{ // TODO: placeholder until OCML adds support.
- double r = 0;
- while (dim--) { r += a[0] * a[0]; ++a; }
-
- return __ocml_sqrt_f64(r);
-}
-__DEVICE__
-inline
-double norm3d(double x, double y, double z)
-{
- return __ocml_len3_f64(x, y, z);
-}
-__DEVICE__
-inline
-double norm4d(double x, double y, double z, double w)
-{
- return __ocml_len4_f64(x, y, z, w);
-}
-__DEVICE__
-inline
-double normcdf(double x) { return __ocml_ncdf_f64(x); }
-__DEVICE__
-inline
-double normcdfinv(double x) { return __ocml_ncdfinv_f64(x); }
-__DEVICE__
-inline
-double pow(double x, double y) { return __ocml_pow_f64(x, y); }
-__DEVICE__
-inline
-double powi(double base, int iexp) { return __ocml_pown_f64(base, iexp); }
-__DEVICE__
-inline
-double rcbrt(double x) { return __ocml_rcbrt_f64(x); }
-__DEVICE__
-inline
-double remainder(double x, double y) { return __ocml_remainder_f64(x, y); }
-__DEVICE__
-inline
-double remquo(double x, double y, int* quo)
-{
- int tmp;
- double r =
- __ocml_remquo_f64(x, y, (__attribute__((address_space(5))) int*) &tmp);
- *quo = tmp;
-
- return r;
-}
-__DEVICE__
-inline
-double rhypot(double x, double y) { return __ocml_rhypot_f64(x, y); }
-__DEVICE__
-inline
-double rint(double x) { return __ocml_rint_f64(x); }
-__DEVICE__
-inline
-double rnorm(int dim, const double* a)
-{ // TODO: placeholder until OCML adds support.
- double r = 0;
- while (dim--) { r += a[0] * a[0]; ++a; }
-
- return __ocml_rsqrt_f64(r);
-}
-__DEVICE__
-inline
-double rnorm3d(double x, double y, double z)
-{
- return __ocml_rlen3_f64(x, y, z);
-}
-__DEVICE__
-inline
-double rnorm4d(double x, double y, double z, double w)
-{
- return __ocml_rlen4_f64(x, y, z, w);
-}
-__DEVICE__
-inline
-double round(double x) { return __ocml_round_f64(x); }
-__DEVICE__
-inline
-double rsqrt(double x) { return __ocml_rsqrt_f64(x); }
-__DEVICE__
-inline
-double scalbln(double x, long int n)
-{
- return (n < INT_MAX) ? __ocml_scalbn_f64(x, n) : __ocml_scalb_f64(x, n);
-}
-__DEVICE__
-inline
-double scalbn(double x, int n) { return __ocml_scalbn_f64(x, n); }
-__DEVICE__
-inline
-__RETURN_TYPE signbit(double x) { return __ocml_signbit_f64(x); }
-__DEVICE__
-inline
-double sin(double x) { return __ocml_sin_f64(x); }
-__DEVICE__
-inline
-void sincos(double x, double* sptr, double* cptr)
-{
- double tmp;
- *sptr =
- __ocml_sincos_f64(x, (__attribute__((address_space(5))) double*) &tmp);
- *cptr = tmp;
-}
-__DEVICE__
-inline
-void sincospi(double x, double* sptr, double* cptr)
-{
- double tmp;
- *sptr = __ocml_sincospi_f64(
- x, (__attribute__((address_space(5))) double*) &tmp);
- *cptr = tmp;
-}
-__DEVICE__
-inline
-double sinh(double x) { return __ocml_sinh_f64(x); }
-__DEVICE__
-inline
-double sinpi(double x) { return __ocml_sinpi_f64(x); }
-__DEVICE__
-inline
-double sqrt(double x) { return __ocml_sqrt_f64(x); }
-__DEVICE__
-inline
-double tan(double x) { return __ocml_tan_f64(x); }
-__DEVICE__
-inline
-double tanh(double x) { return __ocml_tanh_f64(x); }
-__DEVICE__
-inline
-double tgamma(double x) { return __ocml_tgamma_f64(x); }
-__DEVICE__
-inline
-double trunc(double x) { return __ocml_trunc_f64(x); }
-__DEVICE__
-inline
-double y0(double x) { return __ocml_y0_f64(x); }
-__DEVICE__
-inline
-double y1(double x) { return __ocml_y1_f64(x); }
-__DEVICE__
-inline
-double yn(int n, double x)
-{ // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
- // for linear recurrences to get O(log n) steps, but it's unclear if
- // it'd be beneficial in this case. Placeholder until OCML adds
- // support.
- if (n == 0) return j0f(x);
- if (n == 1) return j1f(x);
-
- double x0 = j0f(x);
- double x1 = j1f(x);
- for (int i = 1; i < n; ++i) {
- double x2 = (2 * i) / x * x1 - x0;
- x0 = x1;
- x1 = x2;
- }
-
- return x1;
-}
-
-// BEGIN INTRINSICS
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-double __dadd_rd(double x, double y) { return __ocml_add_rtn_f64(x, y); }
-#endif
-__DEVICE__
-inline
-double __dadd_rn(double x, double y) { return x + y; }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-double __dadd_ru(double x, double y) { return __ocml_add_rtp_f64(x, y); }
-__DEVICE__
-inline
-double __dadd_rz(double x, double y) { return __ocml_add_rtz_f64(x, y); }
-__DEVICE__
-inline
-double __ddiv_rd(double x, double y) { return __ocml_div_rtn_f64(x, y); }
-#endif
-__DEVICE__
-inline
-double __ddiv_rn(double x, double y) { return x / y; }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-double __ddiv_ru(double x, double y) { return __ocml_div_rtp_f64(x, y); }
-__DEVICE__
-inline
-double __ddiv_rz(double x, double y) { return __ocml_div_rtz_f64(x, y); }
-__DEVICE__
-inline
-double __dmul_rd(double x, double y) { return __ocml_mul_rtn_f64(x, y); }
-#endif
-__DEVICE__
-inline
-double __dmul_rn(double x, double y) { return x * y; }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-double __dmul_ru(double x, double y) { return __ocml_mul_rtp_f64(x, y); }
-__DEVICE__
-inline
-double __dmul_rz(double x, double y) { return __ocml_mul_rtz_f64(x, y); }
-__DEVICE__
-inline
-double __drcp_rd(double x) { return __llvm_amdgcn_rcp_f64(x); }
-#endif
-__DEVICE__
-inline
-double __drcp_rn(double x) { return __llvm_amdgcn_rcp_f64(x); }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-double __drcp_ru(double x) { return __llvm_amdgcn_rcp_f64(x); }
-__DEVICE__
-inline
-double __drcp_rz(double x) { return __llvm_amdgcn_rcp_f64(x); }
-__DEVICE__
-inline
-double __dsqrt_rd(double x) { return __ocml_sqrt_rtn_f64(x); }
-#endif
-__DEVICE__
-inline
-double __dsqrt_rn(double x) { return __ocml_sqrt_f64(x); }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-double __dsqrt_ru(double x) { return __ocml_sqrt_rtp_f64(x); }
-__DEVICE__
-inline
-double __dsqrt_rz(double x) { return __ocml_sqrt_rtz_f64(x); }
-__DEVICE__
-inline
-double __dsub_rd(double x, double y) { return __ocml_sub_rtn_f64(x, y); }
-#endif
-__DEVICE__
-inline
-double __dsub_rn(double x, double y) { return x - y; }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-double __dsub_ru(double x, double y) { return __ocml_sub_rtp_f64(x, y); }
-__DEVICE__
-inline
-double __dsub_rz(double x, double y) { return __ocml_sub_rtz_f64(x, y); }
-__DEVICE__
-inline
-double __fma_rd(double x, double y, double z)
-{
- return __ocml_fma_rtn_f64(x, y, z);
-}
-#endif
-__DEVICE__
-inline
-double __fma_rn(double x, double y, double z)
-{
- return __ocml_fma_f64(x, y, z);
-}
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-double __fma_ru(double x, double y, double z)
-{
- return __ocml_fma_rtp_f64(x, y, z);
-}
-__DEVICE__
-inline
-double __fma_rz(double x, double y, double z)
-{
- return __ocml_fma_rtz_f64(x, y, z);
-}
-#endif
-// END INTRINSICS
-// END DOUBLE
-
-// BEGIN INTEGER
-__DEVICE__
-inline
-int abs(int x)
-{
- int sgn = x >> (sizeof(int) * CHAR_BIT - 1);
- return (x ^ sgn) - sgn;
-}
-__DEVICE__
-inline
-long labs(long x)
-{
- long sgn = x >> (sizeof(long) * CHAR_BIT - 1);
- return (x ^ sgn) - sgn;
-}
-__DEVICE__
-inline
-long long llabs(long long x)
-{
- long long sgn = x >> (sizeof(long long) * CHAR_BIT - 1);
- return (x ^ sgn) - sgn;
-}
-
-#if defined(__cplusplus)
- __DEVICE__
- inline
- long abs(long x) { return labs(x); }
- __DEVICE__
- inline
- long long abs(long long x) { return llabs(x); }
-#endif
-// END INTEGER
-
-__DEVICE__
-inline _Float16 fma(_Float16 x, _Float16 y, _Float16 z) {
- return __ocml_fma_f16(x, y, z);
-}
-
-__DEVICE__
-inline float fma(float x, float y, float z) {
- return fmaf(x, y, z);
-}
-
-#pragma push_macro("__DEF_FLOAT_FUN")
-#pragma push_macro("__DEF_FLOAT_FUN2")
-#pragma push_macro("__DEF_FLOAT_FUN2I")
-#pragma push_macro("__HIP_OVERLOAD")
-#pragma push_macro("__HIP_OVERLOAD2")
-
-// __hip_enable_if::type is a type function which returns __T if __B is true.
-template<bool __B, class __T = void>
-struct __hip_enable_if {};
-
-template <class __T> struct __hip_enable_if<true, __T> {
- typedef __T type;
-};
-
-// __HIP_OVERLOAD1 is used to resolve function calls with integer argument to
-// avoid compilation error due to ambibuity. e.g. floor(5) is resolved with
-// floor(double).
-#define __HIP_OVERLOAD1(__retty, __fn) \
- template <typename __T> \
- __DEVICE__ \
- typename __hip_enable_if<std::numeric_limits<__T>::is_integer, \
- __retty>::type \
- __fn(__T __x) { \
- return ::__fn((double)__x); \
- }
-
-// __HIP_OVERLOAD2 is used to resolve function calls with mixed float/double
-// or integer argument to avoid compilation error due to ambibuity. e.g.
-// max(5.0f, 6.0) is resolved with max(double, double).
-#define __HIP_OVERLOAD2(__retty, __fn) \
- template <typename __T1, typename __T2> \
- __DEVICE__ typename __hip_enable_if< \
- std::numeric_limits<__T1>::is_specialized && \
- std::numeric_limits<__T2>::is_specialized, \
- __retty>::type \
- __fn(__T1 __x, __T2 __y) { \
- return __fn((double)__x, (double)__y); \
- }
-
-// Define cmath functions with float argument and returns float.
-#define __DEF_FUN1(retty, func) \
-__DEVICE__ \
-inline \
-float func(float x) \
-{ \
- return func##f(x); \
-} \
-__HIP_OVERLOAD1(retty, func)
-
-// Define cmath functions with float argument and returns retty.
-#define __DEF_FUNI(retty, func) \
-__DEVICE__ \
-inline \
-retty func(float x) \
-{ \
- return func##f(x); \
-} \
-__HIP_OVERLOAD1(retty, func)
-
-// define cmath functions with two float arguments.
-#define __DEF_FUN2(retty, func) \
-__DEVICE__ \
-inline \
-float func(float x, float y) \
-{ \
- return func##f(x, y); \
-} \
-__HIP_OVERLOAD2(retty, func)
-
-__DEF_FUN1(double, acos)
-__DEF_FUN1(double, acosh)
-__DEF_FUN1(double, asin)
-__DEF_FUN1(double, asinh)
-__DEF_FUN1(double, atan)
-__DEF_FUN2(double, atan2);
-__DEF_FUN1(double, atanh)
-__DEF_FUN1(double, cbrt)
-__DEF_FUN1(double, ceil)
-__DEF_FUN2(double, copysign);
-__DEF_FUN1(double, cos)
-__DEF_FUN1(double, cosh)
-__DEF_FUN1(double, erf)
-__DEF_FUN1(double, erfc)
-__DEF_FUN1(double, exp)
-__DEF_FUN1(double, exp2)
-__DEF_FUN1(double, expm1)
-__DEF_FUN1(double, fabs)
-__DEF_FUN2(double, fdim);
-__DEF_FUN1(double, floor)
-__DEF_FUN2(double, fmax);
-__DEF_FUN2(double, fmin);
-__DEF_FUN2(double, fmod);
-//__HIP_OVERLOAD1(int, fpclassify)
-__DEF_FUN2(double, hypot);
-__DEF_FUNI(int, ilogb)
-__HIP_OVERLOAD1(bool, isfinite)
-__HIP_OVERLOAD2(bool, isgreater);
-__HIP_OVERLOAD2(bool, isgreaterequal);
-__HIP_OVERLOAD1(bool, isinf);
-__HIP_OVERLOAD2(bool, isless);
-__HIP_OVERLOAD2(bool, islessequal);
-__HIP_OVERLOAD2(bool, islessgreater);
-__HIP_OVERLOAD1(bool, isnan);
-//__HIP_OVERLOAD1(bool, isnormal)
-__HIP_OVERLOAD2(bool, isunordered);
-__DEF_FUN1(double, lgamma)
-__DEF_FUN1(double, log)
-__DEF_FUN1(double, log10)
-__DEF_FUN1(double, log1p)
-__DEF_FUN1(double, log2)
-__DEF_FUN1(double, logb)
-__DEF_FUNI(long long, llrint)
-__DEF_FUNI(long long, llround)
-__DEF_FUNI(long, lrint)
-__DEF_FUNI(long, lround)
-__DEF_FUN1(double, nearbyint);
-__DEF_FUN2(double, nextafter);
-__DEF_FUN2(double, pow);
-__DEF_FUN2(double, remainder);
-__DEF_FUN1(double, rint);
-__DEF_FUN1(double, round);
-__HIP_OVERLOAD1(bool, signbit)
-__DEF_FUN1(double, sin)
-__DEF_FUN1(double, sinh)
-__DEF_FUN1(double, sqrt)
-__DEF_FUN1(double, tan)
-__DEF_FUN1(double, tanh)
-__DEF_FUN1(double, tgamma)
-__DEF_FUN1(double, trunc);
-
-// define cmath functions with a float and an integer argument.
-#define __DEF_FLOAT_FUN2I(func) \
-__DEVICE__ \
-inline \
-float func(float x, int y) \
-{ \
- return func##f(x, y); \
-}
-__DEF_FLOAT_FUN2I(scalbn)
-__DEF_FLOAT_FUN2I(ldexp)
-
-template<class T>
-__DEVICE__ inline T min(T arg1, T arg2) {
- return (arg1 < arg2) ? arg1 : arg2;
-}
-
-template<class T>
-__DEVICE__ inline T max(T arg1, T arg2) {
- return (arg1 > arg2) ? arg1 : arg2;
-}
-
-#if __HCC__
-
-__DEVICE__ inline static uint32_t min(uint32_t arg1, int32_t arg2) {
- return min(arg1, (uint32_t) arg2);
-}
-/*__DEVICE__ inline static uint32_t min(int32_t arg1, uint32_t arg2) {
- return min((uint32_t) arg1, arg2);
-}
-
-__DEVICE__ inline static uint64_t min(uint64_t arg1, int64_t arg2) {
- return min(arg1, (uint64_t) arg2);
-}
-__DEVICE__ inline static uint64_t min(int64_t arg1, uint64_t arg2) {
- return min((uint64_t) arg1, arg2);
-}
-
-__DEVICE__ inline static unsigned long long min(unsigned long long arg1, long long arg2) {
- return min(arg1, (unsigned long long) arg2);
-}
-__DEVICE__ inline static unsigned long long min(long long arg1, unsigned long long arg2) {
- return min((unsigned long long) arg1, arg2);
-}*/
-
-__DEVICE__ inline static uint32_t max(uint32_t arg1, int32_t arg2) {
- return max(arg1, (uint32_t) arg2);
-}
-__DEVICE__ inline static uint32_t max(int32_t arg1, uint32_t arg2) {
- return max((uint32_t) arg1, arg2);
-}
-
-/*__DEVICE__ inline static uint64_t max(uint64_t arg1, int64_t arg2) {
- return max(arg1, (uint64_t) arg2);
-}
-__DEVICE__ inline static uint64_t max(int64_t arg1, uint64_t arg2) {
- return max((uint64_t) arg1, arg2);
-}
-
-__DEVICE__ inline static unsigned long long max(unsigned long long arg1, long long arg2) {
- return max(arg1, (unsigned long long) arg2);
-}
-__DEVICE__ inline static unsigned long long max(long long arg1, unsigned long long arg2) {
- return max((unsigned long long) arg1, arg2);
-}*/
-#else
-__DEVICE__ inline int min(int arg1, int arg2) {
- return (arg1 < arg2) ? arg1 : arg2;
-}
-__DEVICE__ inline int max(int arg1, int arg2) {
- return (arg1 > arg2) ? arg1 : arg2;
-}
-
-__DEVICE__ inline int min(uint32_t arg1, int arg2) {
- return (arg1 < arg2) ? arg1 : arg2;
-}
-__DEVICE__ inline int max(uint32_t arg1, int arg2) {
- return (arg1 > arg2) ? arg1 : arg2;
-}
-
-__DEVICE__
-inline
-float max(float x, float y) {
- return fmaxf(x, y);
-}
-
-__DEVICE__
-inline
-double max(double x, double y) {
- return fmax(x, y);
-}
-
-__DEVICE__
-inline
-float min(float x, float y) {
- return fminf(x, y);
-}
-
-__DEVICE__
-inline
-double min(double x, double y) {
- return fmin(x, y);
-}
-
-__HIP_OVERLOAD2(double, max)
-__HIP_OVERLOAD2(double, min)
-
-#endif
-
-__host__ inline static int min(int arg1, int arg2) {
- return std::min(arg1, arg2);
-}
-
-__host__ inline static int max(int arg1, int arg2) {
- return std::max(arg1, arg2);
-}
-
-__DEVICE__
-inline float pow(float base, int iexp) {
- return powif(base, iexp);
-}
-
-__DEVICE__
-inline double pow(double base, int iexp) {
- return powi(base, iexp);
-}
-
-__DEVICE__
-inline _Float16 pow(_Float16 base, int iexp) {
- return __ocml_pown_f16(base, iexp);
-}
-
-#pragma pop_macro("__DEF_FLOAT_FUN")
-#pragma pop_macro("__DEF_FLOAT_FUN2")
-#pragma pop_macro("__DEF_FLOAT_FUN2I")
-#pragma pop_macro("__HIP_OVERLOAD")
-#pragma pop_macro("__HIP_OVERLOAD2")
-
-#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
-
-#pragma pop_macro("__DEVICE__")
-#pragma pop_macro("__RETURN_TYPE")
-
-// For backward compatibility.
-// There are HIP applications e.g. TensorFlow, expecting __HIP_ARCH_* macros
-// defined after including math_functions.h.
-#include <hip/hcc_detail/hip_runtime.h>
diff --git a/third_party/rocm/include/hip/hcc_detail/math_fwd.h b/third_party/rocm/include/hip/hcc_detail/math_fwd.h
deleted file mode 100644
index c197af8..0000000
--- a/third_party/rocm/include/hip/hcc_detail/math_fwd.h
+++ /dev/null
@@ -1,714 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#pragma once
-
-#include "host_defines.h"
-#if defined(__cplusplus)
- extern "C" {
-#endif
-
-// DOT FUNCTIONS
-#if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__
-__device__
-__attribute__((const))
-int __ockl_sdot2(
- HIP_vector_base<short, 2>::Native_vec_,
- HIP_vector_base<short, 2>::Native_vec_,
- int, bool);
-
-__device__
-__attribute__((const))
-unsigned int __ockl_udot2(
- HIP_vector_base<unsigned short, 2>::Native_vec_,
- HIP_vector_base<unsigned short, 2>::Native_vec_,
- unsigned int, bool);
-
-__device__
-__attribute__((const))
-int __ockl_sdot4(
- HIP_vector_base<char, 4>::Native_vec_,
- HIP_vector_base<char, 4>::Native_vec_,
- int, bool);
-
-__device__
-__attribute__((const))
-unsigned int __ockl_udot4(
- HIP_vector_base<unsigned char, 4>::Native_vec_,
- HIP_vector_base<unsigned char, 4>::Native_vec_,
- unsigned int, bool);
-
-__device__
-__attribute__((const))
-int __ockl_sdot8(int, int, int, bool);
-
-__device__
-__attribute__((const))
-unsigned int __ockl_udot8(unsigned int, unsigned int, unsigned int, bool);
-#endif
-
-#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
-// BEGIN FLOAT
-__device__
-__attribute__((const))
-float __ocml_acos_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_acosh_f32(float);
-__device__
-__attribute__((const))
-float __ocml_asin_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_asinh_f32(float);
-__device__
-__attribute__((const))
-float __ocml_atan2_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_atan_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_atanh_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_cbrt_f32(float);
-__device__
-__attribute__((const))
-float __ocml_ceil_f32(float);
-__device__
-__attribute__((const))
-__device__
-float __ocml_copysign_f32(float, float);
-__device__
-float __ocml_cos_f32(float);
-__device__
-float __ocml_native_cos_f32(float);
-__device__
-__attribute__((pure))
-__device__
-float __ocml_cosh_f32(float);
-__device__
-float __ocml_cospi_f32(float);
-__device__
-float __ocml_i0_f32(float);
-__device__
-float __ocml_i1_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_erfc_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_erfcinv_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_erfcx_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_erf_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_erfinv_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_exp10_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_native_exp10_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_exp2_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_exp_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_native_exp_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_expm1_f32(float);
-__device__
-__attribute__((const))
-float __ocml_fabs_f32(float);
-__device__
-__attribute__((const))
-float __ocml_fdim_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_floor_f32(float);
-__device__
-__attribute__((const))
-float __ocml_fma_f32(float, float, float);
-__device__
-__attribute__((const))
-float __ocml_fmax_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_fmin_f32(float, float);
-__device__
-__attribute__((const))
-__device__
-float __ocml_fmod_f32(float, float);
-__device__
-float __ocml_frexp_f32(float, __attribute__((address_space(5))) int*);
-__device__
-__attribute__((const))
-float __ocml_hypot_f32(float, float);
-__device__
-__attribute__((const))
-int __ocml_ilogb_f32(float);
-__device__
-__attribute__((const))
-int __ocml_isfinite_f32(float);
-__device__
-__attribute__((const))
-int __ocml_isinf_f32(float);
-__device__
-__attribute__((const))
-int __ocml_isnan_f32(float);
-__device__
-float __ocml_j0_f32(float);
-__device__
-float __ocml_j1_f32(float);
-__device__
-__attribute__((const))
-float __ocml_ldexp_f32(float, int);
-__device__
-float __ocml_lgamma_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_log10_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_native_log10_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_log1p_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_log2_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_native_log2_f32(float);
-__device__
-__attribute__((const))
-float __ocml_logb_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_log_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_native_log_f32(float);
-__device__
-float __ocml_modf_f32(float, __attribute__((address_space(5))) float*);
-__device__
-__attribute__((const))
-float __ocml_nearbyint_f32(float);
-__device__
-__attribute__((const))
-float __ocml_nextafter_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_len3_f32(float, float, float);
-__device__
-__attribute__((const))
-float __ocml_len4_f32(float, float, float, float);
-__device__
-__attribute__((pure))
-float __ocml_ncdf_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_ncdfinv_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_pow_f32(float, float);
-__device__
-__attribute__((pure))
-float __ocml_pown_f32(float, int);
-__device__
-__attribute__((pure))
-float __ocml_rcbrt_f32(float);
-__device__
-__attribute__((const))
-float __ocml_remainder_f32(float, float);
-__device__
-float __ocml_remquo_f32(float, float, __attribute__((address_space(5))) int*);
-__device__
-__attribute__((const))
-float __ocml_rhypot_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_rint_f32(float);
-__device__
-__attribute__((const))
-float __ocml_rlen3_f32(float, float, float);
-__device__
-__attribute__((const))
-float __ocml_rlen4_f32(float, float, float, float);
-__device__
-__attribute__((const))
-float __ocml_round_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_rsqrt_f32(float);
-__device__
-__attribute__((const))
-float __ocml_scalb_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_scalbn_f32(float, int);
-__device__
-__attribute__((const))
-int __ocml_signbit_f32(float);
-__device__
-float __ocml_sincos_f32(float, __attribute__((address_space(5))) float*);
-__device__
-float __ocml_sincospi_f32(float, __attribute__((address_space(5))) float*);
-__device__
-float __ocml_sin_f32(float);
-__device__
-float __ocml_native_sin_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_sinh_f32(float);
-__device__
-float __ocml_sinpi_f32(float);
-__device__
-__attribute__((const))
-float __ocml_sqrt_f32(float);
-__device__
-__attribute__((const))
-float __ocml_native_sqrt_f32(float);
-__device__
-float __ocml_tan_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_tanh_f32(float);
-__device__
-float __ocml_tgamma_f32(float);
-__device__
-__attribute__((const))
-float __ocml_trunc_f32(float);
-__device__
-float __ocml_y0_f32(float);
-__device__
-float __ocml_y1_f32(float);
-
-// BEGIN INTRINSICS
-__device__
-__attribute__((const))
-float __ocml_add_rte_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_add_rtn_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_add_rtp_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_add_rtz_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_sub_rte_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_sub_rtn_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_sub_rtp_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_sub_rtz_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_mul_rte_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_mul_rtn_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_mul_rtp_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_mul_rtz_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_div_rte_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_div_rtn_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_div_rtp_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_div_rtz_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_sqrt_rte_f32(float);
-__device__
-__attribute__((const))
-float __ocml_sqrt_rtn_f32(float);
-__device__
-__attribute__((const))
-float __ocml_sqrt_rtp_f32(float);
-__device__
-__attribute__((const))
-float __ocml_sqrt_rtz_f32(float);
-__device__
-__attribute__((const))
-float __ocml_fma_rte_f32(float, float, float);
-__device__
-__attribute__((const))
-float __ocml_fma_rtn_f32(float, float, float);
-__device__
-__attribute__((const))
-float __ocml_fma_rtp_f32(float, float, float);
-__device__
-__attribute__((const))
-float __ocml_fma_rtz_f32(float, float, float);
-
-__device__
-__attribute__((const))
-float __llvm_amdgcn_cos_f32(float) __asm("llvm.amdgcn.cos.f32");
-__device__
-__attribute__((const))
-float __llvm_amdgcn_rcp_f32(float) __asm("llvm.amdgcn.rcp.f32");
-__device__
-__attribute__((const))
-float __llvm_amdgcn_rsq_f32(float) __asm("llvm.amdgcn.rsq.f32");
-__device__
-__attribute__((const))
-float __llvm_amdgcn_sin_f32(float) __asm("llvm.amdgcn.sin.f32");
-// END INTRINSICS
-// END FLOAT
-
-// BEGIN DOUBLE
-__device__
-__attribute__((const))
-double __ocml_acos_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_acosh_f64(double);
-__device__
-__attribute__((const))
-double __ocml_asin_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_asinh_f64(double);
-__device__
-__attribute__((const))
-double __ocml_atan2_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_atan_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_atanh_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_cbrt_f64(double);
-__device__
-__attribute__((const))
-double __ocml_ceil_f64(double);
-__device__
-__attribute__((const))
-double __ocml_copysign_f64(double, double);
-__device__
-double __ocml_cos_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_cosh_f64(double);
-__device__
-double __ocml_cospi_f64(double);
-__device__
-double __ocml_i0_f64(double);
-__device__
-double __ocml_i1_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_erfc_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_erfcinv_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_erfcx_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_erf_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_erfinv_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_exp10_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_exp2_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_exp_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_expm1_f64(double);
-__device__
-__attribute__((const))
-double __ocml_fabs_f64(double);
-__device__
-__attribute__((const))
-double __ocml_fdim_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_floor_f64(double);
-__device__
-__attribute__((const))
-double __ocml_fma_f64(double, double, double);
-__device__
-__attribute__((const))
-double __ocml_fmax_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_fmin_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_fmod_f64(double, double);
-__device__
-double __ocml_frexp_f64(double, __attribute__((address_space(5))) int*);
-__device__
-__attribute__((const))
-double __ocml_hypot_f64(double, double);
-__device__
-__attribute__((const))
-int __ocml_ilogb_f64(double);
-__device__
-__attribute__((const))
-int __ocml_isfinite_f64(double);
-__device__
-__attribute__((const))
-int __ocml_isinf_f64(double);
-__device__
-__attribute__((const))
-int __ocml_isnan_f64(double);
-__device__
-double __ocml_j0_f64(double);
-__device__
-double __ocml_j1_f64(double);
-__device__
-__attribute__((const))
-double __ocml_ldexp_f64(double, int);
-__device__
-double __ocml_lgamma_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_log10_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_log1p_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_log2_f64(double);
-__device__
-__attribute__((const))
-double __ocml_logb_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_log_f64(double);
-__device__
-double __ocml_modf_f64(double, __attribute__((address_space(5))) double*);
-__device__
-__attribute__((const))
-double __ocml_nearbyint_f64(double);
-__device__
-__attribute__((const))
-double __ocml_nextafter_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_len3_f64(double, double, double);
-__device__
-__attribute__((const))
-double __ocml_len4_f64(double, double, double, double);
-__device__
-__attribute__((pure))
-double __ocml_ncdf_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_ncdfinv_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_pow_f64(double, double);
-__device__
-__attribute__((pure))
-double __ocml_pown_f64(double, int);
-__device__
-__attribute__((pure))
-double __ocml_rcbrt_f64(double);
-__device__
-__attribute__((const))
-double __ocml_remainder_f64(double, double);
-__device__
-double __ocml_remquo_f64(
- double, double, __attribute__((address_space(5))) int*);
-__device__
-__attribute__((const))
-double __ocml_rhypot_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_rint_f64(double);
-__device__
-__attribute__((const))
-double __ocml_rlen3_f64(double, double, double);
-__device__
-__attribute__((const))
-double __ocml_rlen4_f64(double, double, double, double);
-__device__
-__attribute__((const))
-double __ocml_round_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_rsqrt_f64(double);
-__device__
-__attribute__((const))
-double __ocml_scalb_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_scalbn_f64(double, int);
-__device__
-__attribute__((const))
-int __ocml_signbit_f64(double);
-__device__
-double __ocml_sincos_f64(double, __attribute__((address_space(5))) double*);
-__device__
-double __ocml_sincospi_f64(double, __attribute__((address_space(5))) double*);
-__device__
-double __ocml_sin_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_sinh_f64(double);
-__device__
-double __ocml_sinpi_f64(double);
-__device__
-__attribute__((const))
-double __ocml_sqrt_f64(double);
-__device__
-double __ocml_tan_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_tanh_f64(double);
-__device__
-double __ocml_tgamma_f64(double);
-__device__
-__attribute__((const))
-double __ocml_trunc_f64(double);
-__device__
-double __ocml_y0_f64(double);
-__device__
-double __ocml_y1_f64(double);
-
-// BEGIN INTRINSICS
-__device__
-__attribute__((const))
-double __ocml_add_rte_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_add_rtn_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_add_rtp_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_add_rtz_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_sub_rte_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_sub_rtn_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_sub_rtp_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_sub_rtz_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_mul_rte_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_mul_rtn_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_mul_rtp_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_mul_rtz_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_div_rte_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_div_rtn_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_div_rtp_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_div_rtz_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_sqrt_rte_f64(double);
-__device__
-__attribute__((const))
-double __ocml_sqrt_rtn_f64(double);
-__device__
-__attribute__((const))
-double __ocml_sqrt_rtp_f64(double);
-__device__
-__attribute__((const))
-double __ocml_sqrt_rtz_f64(double);
-__device__
-__attribute__((const))
-double __ocml_fma_rte_f64(double, double, double);
-__device__
-__attribute__((const))
-double __ocml_fma_rtn_f64(double, double, double);
-__device__
-__attribute__((const))
-double __ocml_fma_rtp_f64(double, double, double);
-__device__
-__attribute__((const))
-double __ocml_fma_rtz_f64(double, double, double);
-
-__device__
-__attribute__((const))
-double __llvm_amdgcn_rcp_f64(double) __asm("llvm.amdgcn.rcp.f64");
-__device__
-__attribute__((const))
-double __llvm_amdgcn_rsq_f64(double) __asm("llvm.amdgcn.rsq.f64");
-// END INTRINSICS
-// END DOUBLE
-
-#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
-
-#if defined(__cplusplus)
- } // extern "C"
-#endif
diff --git a/third_party/rocm/include/hip/hcc_detail/ockl_image.h b/third_party/rocm/include/hip/hcc_detail/ockl_image.h
deleted file mode 100644
index b32b23f..0000000
--- a/third_party/rocm/include/hip/hcc_detail/ockl_image.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#pragma once
-
-#include <hip/hip_vector_types.h>
-
-extern "C" {
-
-#define ADDRESS_SPACE_CONSTANT __attribute__((address_space(4)))
-
-__device__ float4::Native_vec_ __ockl_image_load_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c);
-
-__device__ float4::Native_vec_ __ockl_image_load_1Db(unsigned int ADDRESS_SPACE_CONSTANT*i, int c);
-
-__device__ float4::Native_vec_ __ockl_image_load_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c);
-
-__device__ float4::Native_vec_ __ockl_image_load_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c);
-
-__device__ float4::Native_vec_ __ockl_image_load_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c);
-
-__device__ float4::Native_vec_ __ockl_image_load_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c);
-
-__device__ float4::Native_vec_ __ockl_image_load_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int f);
-
-__device__ float4::Native_vec_ __ockl_image_load_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int f);
-
-__device__ float4::Native_vec_ __ockl_image_load_lod_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c, int l);
-
-__device__ float4::Native_vec_ __ockl_image_load_lod_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l);
-
-__device__ float4::Native_vec_ __ockl_image_load_lod_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l);
-
-__device__ float4::Native_vec_ __ockl_image_load_lod_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l);
-
-__device__ float4::Native_vec_ __ockl_image_load_lod_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l);
-
-__device__ float4::Native_vec_ __ockl_image_load_lod_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int f, int l);
-
-__device__ float4::Native_vec_ __ockl_image_load_lod_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int f, int l);
-
-__device__ void __ockl_image_store_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c, float4::Native_vec_ p);
-
-__device__ void __ockl_image_store_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, float4::Native_vec_ p);
-
-__device__ void __ockl_image_store_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, float4::Native_vec_ p);
-
-__device__ void __ockl_image_store_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, float4::Native_vec_ p);
-
-__device__ void __ockl_image_store_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, float4::Native_vec_ p);
-
-__device__ void __ockl_image_store_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, float4::Native_vec_ p);
-
-__device__ void __ockl_image_store_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, float4::Native_vec_ p);
-
-__device__ void __ockl_image_store_lod_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c, int l, float4::Native_vec_ p);
-
-__device__ void __ockl_image_store_lod_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l, float4::Native_vec_ p);
-
-__device__ void __ockl_image_store_lod_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l, float4::Native_vec_ p);
-
-__device__ void __ockl_image_store_lod_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l, float4::Native_vec_ p);
-
-__device__ void __ockl_image_store_lod_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l, float4::Native_vec_ p);
-
-__device__ void __ockl_image_store_lod_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l, float4::Native_vec_ p);
-
-__device__ void __ockl_image_store_lod_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l, float4::Native_vec_ p);
-
-__device__ float4::Native_vec_ __ockl_image_sample_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float c);
-
-__device__ float4::Native_vec_ __ockl_image_sample_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c);
-
-__device__ float4::Native_vec_ __ockl_image_sample_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c);
-
-__device__ float4::Native_vec_ __ockl_image_sample_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c);
-
-__device__ float4::Native_vec_ __ockl_image_sample_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c);
-
-__device__ float4::Native_vec_ __ockl_image_sample_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c);
-
-__device__ float4::Native_vec_ __ockl_image_sample_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c);
-
-__device__ float4::Native_vec_ __ockl_image_sample_grad_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float c, float dx, float dy);
-
-__device__ float4::Native_vec_ __ockl_image_sample_grad_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float dx, float dy);
-
-__device__ float4::Native_vec_ __ockl_image_sample_grad_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float2::Native_vec_ dx, float2::Native_vec_ dy);
-
-__device__ float4::Native_vec_ __ockl_image_sample_grad_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float2::Native_vec_ dx, float2::Native_vec_ dy);
-
-__device__ float4::Native_vec_ __ockl_image_sample_grad_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float4::Native_vec_ dx, float4::Native_vec_ dy);
-
-__device__ float4::Native_vec_ __ockl_image_sample_lod_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float c, float l);
-
-__device__ float4::Native_vec_ __ockl_image_sample_lod_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float l);
-
-__device__ float4::Native_vec_ __ockl_image_sample_lod_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float l);
-
-__device__ float4::Native_vec_ __ockl_image_sample_lod_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l);
-
-__device__ float4::Native_vec_ __ockl_image_sample_lod_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l);
-
-__device__ float4::Native_vec_ __ockl_image_sample_lod_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l);
-
-__device__ float4::Native_vec_ __ockl_image_sample_lod_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l);
-
-__device__ float4::Native_vec_ __ockl_image_gather4r_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c);
-
-__device__ float4::Native_vec_ __ockl_image_gather4g_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c);
-
-__device__ float4::Native_vec_ __ockl_image_gather4b_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c);
-
-__device__ float4::Native_vec_ __ockl_image_gather4a_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c);
-
-};
\ No newline at end of file
diff --git a/third_party/rocm/include/hip/hcc_detail/program_state.hpp b/third_party/rocm/include/hip/hcc_detail/program_state.hpp
deleted file mode 100644
index 6128a4c..0000000
--- a/third_party/rocm/include/hip/hcc_detail/program_state.hpp
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#pragma once
-
-#include <hsa/amd_hsa_kernel_code.h>
-#include <hsa/hsa.h>
-#include <hsa/hsa_ext_amd.h>
-#include <hsa/hsa_ven_amd_loader.h>
-
-#include <cstddef>
-#include <cstdint>
-#include <cstdlib>
-
-#include <hip/hip_common.h>
-
-struct ihipModuleSymbol_t;
-using hipFunction_t = ihipModuleSymbol_t*;
-
-namespace hip_impl {
-
-// This section contains internal APIs that
-// needs to be exported
-#ifdef __GNUC__
-#pragma GCC visibility push (default)
-#endif
-
-struct kernarg_impl;
-class kernarg {
-public:
- kernarg();
- kernarg(kernarg&&);
- ~kernarg();
- std::uint8_t* data();
- std::size_t size();
- void reserve(std::size_t);
- void resize(std::size_t);
-private:
- kernarg_impl* impl;
-};
-
-class kernargs_size_align;
-class program_state_impl;
-class program_state {
-public:
- program_state();
- ~program_state();
- program_state(const program_state&) = delete;
-
- hipFunction_t kernel_descriptor(std::uintptr_t,
- hsa_agent_t);
-
- kernargs_size_align get_kernargs_size_align(std::uintptr_t);
- hsa_executable_t load_executable(const char*, const size_t,
- hsa_executable_t,
- hsa_agent_t);
- hsa_executable_t load_executable_no_copy(const char*, const size_t,
- hsa_executable_t,
- hsa_agent_t);
-
- void* global_addr_by_name(const char* name);
-
-private:
- friend class agent_globals_impl;
- program_state_impl* impl;
-};
-
-class kernargs_size_align {
-public:
- std::size_t size(std::size_t n) const;
- std::size_t alignment(std::size_t n) const;
- const void* getHandle() const {return handle;};
-private:
- const void* handle;
- friend kernargs_size_align program_state::get_kernargs_size_align(std::uintptr_t);
-};
-
-#ifdef __GNUC__
-#pragma GCC visibility pop
-#endif
-
-inline
-__attribute__((visibility("hidden")))
-program_state& get_program_state() {
- static program_state ps;
- return ps;
-}
-} // Namespace hip_impl.
diff --git a/third_party/rocm/include/hip/hcc_detail/surface_functions.h b/third_party/rocm/include/hip/hcc_detail/surface_functions.h
deleted file mode 100644
index b9cab1f..0000000
--- a/third_party/rocm/include/hip/hcc_detail/surface_functions.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
-Copyright (c) 2018 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_SURFACE_FUNCTIONS_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_SURFACE_FUNCTIONS_H
-
-#include <hip/hcc_detail/hip_surface_types.h>
-
-#define __SURFACE_FUNCTIONS_DECL__ static inline __device__
-template <class T>
-__SURFACE_FUNCTIONS_DECL__ void surf2Dread(T* data, hipSurfaceObject_t surfObj, int x, int y,
- int boundaryMode = hipBoundaryModeZero) {
- hipArray* arrayPtr = (hipArray*)surfObj;
- size_t width = arrayPtr->width;
- size_t height = arrayPtr->height;
- int32_t xOffset = x / sizeof(T);
- T* dataPtr = (T*)arrayPtr->data;
- if ((xOffset > width) || (xOffset < 0) || (y > height) || (y < 0)) {
- if (boundaryMode == hipBoundaryModeZero) {
- *data = 0;
- }
- } else {
- *data = *(dataPtr + y * width + xOffset);
- }
-}
-
-template <class T>
-__SURFACE_FUNCTIONS_DECL__ void surf2Dwrite(T data, hipSurfaceObject_t surfObj, int x, int y,
- int boundaryMode = hipBoundaryModeZero) {
- hipArray* arrayPtr = (hipArray*)surfObj;
- size_t width = arrayPtr->width;
- size_t height = arrayPtr->height;
- int32_t xOffset = x / sizeof(T);
- T* dataPtr = (T*)arrayPtr->data;
- if (!((xOffset > width) || (xOffset < 0) || (y > height) || (y < 0))) {
- *(dataPtr + y * width + xOffset) = data;
- }
-}
-
-#endif
diff --git a/third_party/rocm/include/hip/hcc_detail/texture_fetch_functions.h b/third_party/rocm/include/hip/hcc_detail/texture_fetch_functions.h
deleted file mode 100644
index 03c1780..0000000
--- a/third_party/rocm/include/hip/hcc_detail/texture_fetch_functions.h
+++ /dev/null
@@ -1,386 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#pragma once
-
-#if defined(__cplusplus)
-
-#include <hip/hip_vector_types.h>
-#include <hip/texture_types.h>
-#include <hip/hcc_detail/ockl_image.h>
-
-#include <type_traits>
-
-#define TEXTURE_PARAMETERS_INIT \
- unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)t.textureObject; \
- unsigned int ADDRESS_SPACE_CONSTANT* s = i + HIP_SAMPLER_OBJECT_OFFSET_DWORD;
-
-template<typename T>
-struct __hip_is_tex_channel_type
-{
- static constexpr bool value =
- std::is_same<T, char>::value ||
- std::is_same<T, unsigned char>::value ||
- std::is_same<T, short>::value ||
- std::is_same<T, unsigned short>::value ||
- std::is_same<T, int>::value ||
- std::is_same<T, unsigned int>::value ||
- std::is_same<T, float>::value;
-};
-
-template<
- typename T,
- unsigned int rank>
-struct __hip_is_tex_channel_type<HIP_vector_type<T, rank>>
-{
- static constexpr bool value =
- __hip_is_tex_channel_type<T>::value &&
- ((rank == 1) ||
- (rank == 2) ||
- (rank == 4));
-};
-
-template<typename T>
-struct __hip_is_tex_normalized_channel_type
-{
- static constexpr bool value =
- std::is_same<T, char>::value ||
- std::is_same<T, unsigned char>::value ||
- std::is_same<T, short>::value ||
- std::is_same<T, unsigned short>::value;
-};
-
-template<
- typename T,
- unsigned int rank>
-struct __hip_is_tex_normalized_channel_type<HIP_vector_type<T, rank>>
-{
- static constexpr bool value =
- __hip_is_tex_normalized_channel_type<T>::value &&
- ((rank == 1) ||
- (rank == 2) ||
- (rank == 4));
-};
-
-template <
- typename T,
- hipTextureReadMode readMode,
- typename Enable = void>
-struct __hip_tex_ret
-{
- static_assert(std::is_same<Enable, void>::value, "Invalid channel type!");
-};
-
-template <
- typename T,
- hipTextureReadMode readMode>
-using __hip_tex_ret_t = typename __hip_tex_ret<T, readMode, bool>::type;
-
-template <typename T>
-struct __hip_tex_ret<
- T,
- hipReadModeElementType,
- typename std::enable_if<__hip_is_tex_channel_type<T>::value, bool>::type>
-{
- using type = T;
-};
-
-template<
- typename T,
- unsigned int rank>
-struct __hip_tex_ret<
- HIP_vector_type<T, rank>,
- hipReadModeElementType,
- typename std::enable_if<__hip_is_tex_channel_type<HIP_vector_type<T, rank>>::value, bool>::type>
-{
- using type = HIP_vector_type<__hip_tex_ret_t<T, hipReadModeElementType>, rank>;
-};
-
-template<typename T>
-struct __hip_tex_ret<
- T,
- hipReadModeNormalizedFloat,
- typename std::enable_if<__hip_is_tex_normalized_channel_type<T>::value, bool>::type>
-{
- using type = float;
-};
-
-template<
- typename T,
- unsigned int rank>
-struct __hip_tex_ret<
- HIP_vector_type<T, rank>,
- hipReadModeNormalizedFloat,
- typename std::enable_if<__hip_is_tex_normalized_channel_type<HIP_vector_type<T, rank>>::value, bool>::type>
-{
- using type = HIP_vector_type<__hip_tex_ret_t<T, hipReadModeNormalizedFloat>, rank>;
-};
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex1Dfetch(texture<T, hipTextureType1D, readMode> t, int x)
-{
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_load_1Db(i, x);
- return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex1D(texture<T, hipTextureType1D, readMode> t, float x)
-{
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_1D(i, s, x);
- return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex2D(texture<T, hipTextureType2D, readMode> t, float x, float y)
-{
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_2D(i, s, float2(x, y).data);
- return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex1DLayered(texture<T, hipTextureType1DLayered, readMode> t, float x, int layer)
-{
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex2DLayered(texture<T, hipTextureType2DLayered, readMode> t, float x, float y, int layer)
-{
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex3D(texture<T, hipTextureType3D, readMode> t, float x, float y, float z)
-{
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> texCubemap(texture<T, hipTextureTypeCubemap, readMode> t, float x, float y, float z)
-{
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_CM(i, s, float4(x, y, z, 0.0f).data);
- return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex1DLod(texture<T, hipTextureType1D, readMode> t, float x, float level)
-{
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_lod_1D(i, s, x, level);
- return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex2DLod(texture<T, hipTextureType2D, readMode> t, float x, float y, float level)
-{
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex1DLayeredLod(texture<T, hipTextureType1DLayered, readMode> t, float x, int layer, float level)
-{
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex2DLayeredLod(texture<T, hipTextureType2DLayered, readMode> t, float x, float y, int layer, float level)
-{
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_lod_2Da(i, s, float4(x, y, layer, 0.0f).data, level);
- return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex3DLod(texture<T, hipTextureType3D, readMode> t, float x, float y, float z, float level)
-{
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, level);
- return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> texCubemapLod(texture<T, hipTextureTypeCubemap, readMode> t, float x, float y, float z, float level)
-{
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_lod_CM(i, s, float4(x, y, z, 0.0f).data, level);
- return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> texCubemapLayered(texture<T, hipTextureTypeCubemapLayered, readMode> t, float x, float y, float z, int layer)
-{
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_CMa(i, s, float4(x, y, z, layer).data);
- return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> texCubemapLayeredLod(texture<T, hipTextureTypeCubemapLayered, readMode> t, float x, float y, float z, int layer, float level)
-{
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_lod_CMa(i, s, float4(x, y, z, layer).data, level);
- return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> texCubemapGrad(texture<T, hipTextureTypeCubemap, readMode> t, float x, float y, float z, float4 dPdx, float4 dPdy)
-{
- TEXTURE_PARAMETERS_INIT;
- // TODO missing in device libs.
- // auto tmp = __ockl_image_sample_grad_CM(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
- // return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
- return {};
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> texCubemapLayeredGrad(texture<T, hipTextureTypeCubemapLayered, readMode> t, float x, float y, float z, int layer, float4 dPdx, float4 dPdy)
-{
- TEXTURE_PARAMETERS_INIT;
- // TODO missing in device libs.
- // auto tmp = __ockl_image_sample_grad_CMa(i, s, float4(x, y, z, layer).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
- // return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
- return {};
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex1DGrad(texture<T, hipTextureType1D, readMode> t, float x, float dPdx, float dPdy)
-{
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_grad_1D(i, s, x, dPdx, dPdy);
- return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex2DGrad(texture<T, hipTextureType2D, readMode> t, float x, float y, float2 dPdx, float2 dPdy)
-{
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data);
- return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex1DLayeredGrad(texture<T, hipTextureType1DLayered, readMode> t, float x, int layer, float dPdx, float dPdy)
-{
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dPdx, dPdy);
- return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex2DLayeredGrad(texture<T, hipTextureType2DLayered, readMode> t, float x, float y, int layer, float2 dPdx, float2 dPdy)
-{
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data);
- return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex3DGrad(texture<T, hipTextureType3D, readMode> t, float x, float y, float z, float4 dPdx, float4 dPdy)
-{
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
- return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <
- typename T,
- hipTextureReadMode readMode,
- typename Enable = void>
-struct __hip_tex2dgather_ret
-{
- static_assert(std::is_same<Enable, void>::value, "Invalid channel type!");
-};
-
-template <
- typename T,
- hipTextureReadMode readMode>
-using __hip_tex2dgather_ret_t = typename __hip_tex2dgather_ret<T, readMode, bool>::type;
-
-template <typename T>
-struct __hip_tex2dgather_ret<
- T,
- hipReadModeElementType,
- typename std::enable_if<__hip_is_tex_channel_type<T>::value, bool>::type>
-{
- using type = HIP_vector_type<T, 4>;
-};
-
-template<
- typename T,
- unsigned int rank>
-struct __hip_tex2dgather_ret<
- HIP_vector_type<T, rank>,
- hipReadModeElementType,
- typename std::enable_if<__hip_is_tex_channel_type<HIP_vector_type<T, rank>>::value, bool>::type>
-{
- using type = HIP_vector_type<T, 4>;
-};
-
-template <typename T>
-struct __hip_tex2dgather_ret<
- T,
- hipReadModeNormalizedFloat,
- typename std::enable_if<__hip_is_tex_normalized_channel_type<T>::value, bool>::type>
-{
- using type = float4;
-};
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex2dgather_ret_t<T, readMode> tex2Dgather(texture<T, hipTextureType2D, readMode> t, float x, float y, int comp=0)
-{
- TEXTURE_PARAMETERS_INIT;
- switch (comp) {
- case 1: {
- auto tmp = __ockl_image_gather4g_2D(i, s, float2(x, y).data);
- return *reinterpret_cast<__hip_tex2dgather_ret_t<T, readMode>*>(&tmp);
- }
- case 2: {
- auto tmp = __ockl_image_gather4b_2D(i, s, float2(x, y).data);
- return *reinterpret_cast<__hip_tex2dgather_ret_t<T, readMode>*>(&tmp);
- }
- case 3: {
- auto tmp = __ockl_image_gather4a_2D(i, s, float2(x, y).data);
- return *reinterpret_cast<__hip_tex2dgather_ret_t<T, readMode>*>(&tmp);
- }
- default: {
- auto tmp = __ockl_image_gather4r_2D(i, s, float2(x, y).data);
- return *reinterpret_cast<__hip_tex2dgather_ret_t<T, readMode>*>(&tmp);
- }
- }
- return {};
-}
-
-#endif
diff --git a/third_party/rocm/include/hip/hcc_detail/texture_functions.h b/third_party/rocm/include/hip/hcc_detail/texture_functions.h
deleted file mode 100644
index 4a84507..0000000
--- a/third_party/rocm/include/hip/hcc_detail/texture_functions.h
+++ /dev/null
@@ -1,11102 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_TEXTURE_FUNCTIONS_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_TEXTURE_FUNCTIONS_H
-#include <hip/hcc_detail/hip_vector_types.h>
-#include <hip/hcc_detail/hip_texture_types.h>
-
-#pragma push_macro("TYPEDEF_VECTOR_VALUE_TYPE")
-#define TYPEDEF_VECTOR_VALUE_TYPE(SCALAR_TYPE) \
-typedef SCALAR_TYPE __hip_##SCALAR_TYPE##2_vector_value_type __attribute__((ext_vector_type(2))); \
-typedef SCALAR_TYPE __hip_##SCALAR_TYPE##3_vector_value_type __attribute__((ext_vector_type(3))); \
-typedef SCALAR_TYPE __hip_##SCALAR_TYPE##4_vector_value_type __attribute__((ext_vector_type(4))); \
-typedef SCALAR_TYPE __hip_##SCALAR_TYPE##8_vector_value_type __attribute__((ext_vector_type(8))); \
-typedef SCALAR_TYPE __hip_##SCALAR_TYPE##16_vector_value_type __attribute__((ext_vector_type(16)));
-
-TYPEDEF_VECTOR_VALUE_TYPE(float);
-TYPEDEF_VECTOR_VALUE_TYPE(int);
-TYPEDEF_VECTOR_VALUE_TYPE(uint);
-
-#undef TYPEDEF_VECTOR_VALUE_TYPE
-#pragma pop_macro("TYPEDEF_VECTOR_VALUE_TYPE")
-
-union TData {
- __hip_float4_vector_value_type f;
- __hip_int4_vector_value_type i;
- __hip_uint4_vector_value_type u;
-};
-
-#define __TEXTURE_FUNCTIONS_DECL__ static inline __device__
-
-
-#if (__hcc_workweek__ >= 18114) || __clang__
-#define ADDRESS_SPACE_CONSTANT __attribute__((address_space(4)))
-#else
-#define ADDRESS_SPACE_CONSTANT __attribute__((address_space(2)))
-#endif
-
-#define TEXTURE_PARAMETERS_INIT \
- unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)textureObject; \
- unsigned int ADDRESS_SPACE_CONSTANT* s = i + HIP_SAMPLER_OBJECT_OFFSET_DWORD; \
- TData texel;
-#define TEXTURE_REF_PARAMETERS_INIT \
- unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)texRef.textureObject; \
- unsigned int ADDRESS_SPACE_CONSTANT* s = i + HIP_SAMPLER_OBJECT_OFFSET_DWORD; \
- TData texel;
-#define TEXTURE_SET_FLOAT *retVal = texel.f.x;
-
-#define TEXTURE_SET_SIGNED *retVal = texel.i.x;
-
-#define TEXTURE_SET_UNSIGNED *retVal = texel.u.x;
-
-#define TEXTURE_SET_FLOAT_X retVal->x = texel.f.x;
-
-#define TEXTURE_SET_SIGNED_X retVal->x = texel.i.x;
-
-#define TEXTURE_SET_UNSIGNED_X retVal->x = texel.u.x;
-
-#define TEXTURE_SET_FLOAT_XY \
- retVal->x = texel.f.x; \
- retVal->y = texel.f.y;
-
-#define TEXTURE_SET_SIGNED_XY \
- retVal->x = texel.i.x; \
- retVal->y = texel.i.y;
-
-#define TEXTURE_SET_UNSIGNED_XY \
- retVal->x = texel.u.x; \
- retVal->y = texel.u.y;
-
-#define TEXTURE_SET_FLOAT_XYZW \
- retVal->x = texel.f.x; \
- retVal->y = texel.f.y; \
- retVal->z = texel.f.z; \
- retVal->w = texel.f.w;
-
-#define TEXTURE_SET_SIGNED_XYZW \
- retVal->x = texel.i.x; \
- retVal->y = texel.i.y; \
- retVal->z = texel.i.z; \
- retVal->w = texel.i.w;
-
-#define TEXTURE_SET_UNSIGNED_XYZW \
- retVal->x = texel.u.x; \
- retVal->y = texel.u.y; \
- retVal->z = texel.u.z; \
- retVal->w = texel.u.w;
-
-#define TEXTURE_RETURN_CHAR return texel.i.x;
-
-#define TEXTURE_RETURN_UCHAR return texel.u.x;
-
-#define TEXTURE_RETURN_SHORT return texel.i.x;
-
-#define TEXTURE_RETURN_USHORT return texel.u.x;
-
-#define TEXTURE_RETURN_INT return texel.i.x;
-
-#define TEXTURE_RETURN_UINT return texel.u.x;
-
-#define TEXTURE_RETURN_SIGNED return texel.i.x;
-
-#define TEXTURE_RETURN_UNSIGNED return texel.u.x;
-
-#define TEXTURE_RETURN_CHAR_X return make_char1(texel.i.x);
-
-#define TEXTURE_RETURN_UCHAR_X return make_uchar1(texel.u.x);
-
-#define TEXTURE_RETURN_SHORT_X return make_short1(texel.i.x);
-
-#define TEXTURE_RETURN_USHORT_X return make_ushort1(texel.u.x);
-
-#define TEXTURE_RETURN_INT_X return make_int1(texel.i.x);
-
-#define TEXTURE_RETURN_UINT_X return make_uint1(texel.u.x);
-
-#define TEXTURE_RETURN_CHAR_XY return make_char2(texel.i.x, texel.i.y);
-
-#define TEXTURE_RETURN_UCHAR_XY return make_uchar2(texel.u.x, texel.u.y);
-
-#define TEXTURE_RETURN_SHORT_XY return make_short2(texel.i.x, texel.i.y);
-
-#define TEXTURE_RETURN_USHORT_XY return make_ushort2(texel.u.x, texel.u.y);
-
-#define TEXTURE_RETURN_INT_XY return make_int2(texel.i.x, texel.i.y);
-
-#define TEXTURE_RETURN_UINT_XY return make_uint2(texel.u.x, texel.u.y);
-
-#define TEXTURE_RETURN_CHAR_XYZW return make_char4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
-
-#define TEXTURE_RETURN_UCHAR_XYZW return make_uchar4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
-
-#define TEXTURE_RETURN_SHORT_XYZW return make_short4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
-
-#define TEXTURE_RETURN_USHORT_XYZW return make_ushort4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
-
-#define TEXTURE_RETURN_INT_XYZW return make_int4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
-
-#define TEXTURE_RETURN_UINT_XYZW return make_uint4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
-
-#define TEXTURE_RETURN_FLOAT return texel.f.x;
-
-#define TEXTURE_RETURN_FLOAT_X return make_float1(texel.f.x);
-
-#define TEXTURE_RETURN_FLOAT_XY return make_float2(texel.f.x, texel.f.y);
-
-#define TEXTURE_RETURN_FLOAT_XYZW return make_float4(texel.f.x, texel.f.y, texel.f.z, texel.f.w);
-
-extern "C" {
-
-__device__
-__hip_float4_vector_value_type __ockl_image_sample_1D(
- unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
- float c);
-
-__device__
-__hip_float4_vector_value_type __ockl_image_sample_1Da(
- unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
- __hip_float2_vector_value_type c);
-
-__device__
-__hip_float4_vector_value_type __ockl_image_sample_2D(
- unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
- __hip_float2_vector_value_type c);
-
-
-__device__
-__hip_float4_vector_value_type __ockl_image_sample_2Da(
- unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
- __hip_float4_vector_value_type c);
-
-__device__
-float __ockl_image_sample_2Dad(
- unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
- __hip_float4_vector_value_type c);
-
-__device__
-float __ockl_image_sample_2Dd(
- unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
- __hip_float2_vector_value_type c);
-
-__device__
-__hip_float4_vector_value_type __ockl_image_sample_3D(
- unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
- __hip_float4_vector_value_type c);
-
-__device__
-__hip_float4_vector_value_type __ockl_image_sample_grad_1D(
- unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
- float c, float dx, float dy);
-
-__device__
-__hip_float4_vector_value_type __ockl_image_sample_grad_1Da(
- unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
- __hip_float2_vector_value_type c, float dx, float dy);
-
-__device__
-__hip_float4_vector_value_type __ockl_image_sample_grad_2D(
- unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
- __hip_float2_vector_value_type c, __hip_float2_vector_value_type dx, __hip_float2_vector_value_type dy);
-
-__device__
-__hip_float4_vector_value_type __ockl_image_sample_grad_2Da(
- unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
- __hip_float4_vector_value_type c, __hip_float2_vector_value_type dx, __hip_float2_vector_value_type dy);
-
-__device__
-float __ockl_image_sample_grad_2Dad(
- unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
- __hip_float4_vector_value_type c, __hip_float2_vector_value_type dx, __hip_float2_vector_value_type dy);
-
-__device__
-float __ockl_image_sample_grad_2Dd(
- unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
- __hip_float2_vector_value_type c, __hip_float2_vector_value_type dx, __hip_float2_vector_value_type dy);
-
-__device__
-__hip_float4_vector_value_type __ockl_image_sample_grad_3D(
- unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
- __hip_float4_vector_value_type c, __hip_float4_vector_value_type dx, __hip_float4_vector_value_type dy);
-
-__device__
-__hip_float4_vector_value_type __ockl_image_sample_lod_1D(
- unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
- float c, float l);
-
-__device__
-__hip_float4_vector_value_type __ockl_image_sample_lod_1Da(
- unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
- __hip_float2_vector_value_type c, float l);
-
-__device__
-__hip_float4_vector_value_type __ockl_image_sample_lod_2D(
- unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
- __hip_float2_vector_value_type c, float l);
-
-__device__
-__hip_float4_vector_value_type __ockl_image_sample_lod_2Da(
- unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
- __hip_float4_vector_value_type c, float l);
-
-__device__
-float __ockl_image_sample_lod_2Dad(
- unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
- __hip_float4_vector_value_type c, float l);
-
-__device__
-float __ockl_image_sample_lod_2Dd(
- unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
- __hip_float2_vector_value_type c, float l);
-
-__device__
-__hip_float4_vector_value_type __ockl_image_sample_lod_3D(
- unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
- __hip_float4_vector_value_type c, float l);
-}
-
-////////////////////////////////////////////////////////////
-// Texture object APIs
-////////////////////////////////////////////////////////////
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(char* retVal, hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(char1* retVal, hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(char2* retVal, hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(char4* retVal, hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(unsigned char* retVal, hipTextureObject_t textureObject,
- int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(uchar1* retVal, hipTextureObject_t textureObject,
- int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(uchar2* retVal, hipTextureObject_t textureObject,
- int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(uchar4* retVal, hipTextureObject_t textureObject,
- int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(short* retVal, hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(short1* retVal, hipTextureObject_t textureObject,
- int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(short2* retVal, hipTextureObject_t textureObject,
- int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(short4* retVal, hipTextureObject_t textureObject,
- int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(unsigned short* retVal, hipTextureObject_t textureObject,
- int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(ushort1* retVal, hipTextureObject_t textureObject,
- int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(ushort2* retVal, hipTextureObject_t textureObject,
- int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(ushort4* retVal, hipTextureObject_t textureObject,
- int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(int* retVal, hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(int1* retVal, hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(int2* retVal, hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(int4* retVal, hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(unsigned int* retVal, hipTextureObject_t textureObject,
- int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(uint1* retVal, hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(uint2* retVal, hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(uint4* retVal, hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(float* retVal, hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_FLOAT;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(float1* retVal, hipTextureObject_t textureObject,
- int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_FLOAT_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(float2* retVal, hipTextureObject_t textureObject,
- int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_FLOAT_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(float4* retVal, hipTextureObject_t textureObject,
- int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_FLOAT_XYZW;
-}
-
-template <class T>
-__TEXTURE_FUNCTIONS_DECL__ T tex1Dfetch(hipTextureObject_t textureObject, int x) {
- T ret;
- tex1Dfetch(&ret, textureObject, x);
- return ret;
-}
-
-////////////////////////////////////////////////////////////
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(char* retVal, hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(char1* retVal, hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(char2* retVal, hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(char4* retVal, hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(unsigned char* retVal, hipTextureObject_t textureObject,
- float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(uchar1* retVal, hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(uchar2* retVal, hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(uchar4* retVal, hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(short* retVal, hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(short1* retVal, hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(short2* retVal, hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(short4* retVal, hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(unsigned short* retVal, hipTextureObject_t textureObject,
- float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(ushort1* retVal, hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(ushort2* retVal, hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(ushort4* retVal, hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(int* retVal, hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(int1* retVal, hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(int2* retVal, hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(int4* retVal, hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(unsigned int* retVal, hipTextureObject_t textureObject,
- float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(uint1* retVal, hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(uint2* retVal, hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(uint4* retVal, hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(float* retVal, hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_FLOAT;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(float1* retVal, hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_FLOAT_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(float2* retVal, hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_FLOAT_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(float4* retVal, hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_SET_FLOAT_XYZW;
-}
-template <class T>
-__TEXTURE_FUNCTIONS_DECL__ T tex1D(hipTextureObject_t textureObject, float x) {
- T ret;
- tex1D(&ret, textureObject, x);
- return ret;
-}
-
-////////////////////////////////////////////////////////////
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(char* retVal, hipTextureObject_t textureObject, float x,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(char1* retVal, hipTextureObject_t textureObject, float x,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(char2* retVal, hipTextureObject_t textureObject, float x,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(char4* retVal, hipTextureObject_t textureObject, float x,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(unsigned char* retVal, hipTextureObject_t textureObject,
- float x, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(uchar1* retVal, hipTextureObject_t textureObject, float x,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(uchar2* retVal, hipTextureObject_t textureObject, float x,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(uchar4* retVal, hipTextureObject_t textureObject, float x,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(short* retVal, hipTextureObject_t textureObject, float x,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(short1* retVal, hipTextureObject_t textureObject, float x,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(short2* retVal, hipTextureObject_t textureObject, float x,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(short4* retVal, hipTextureObject_t textureObject, float x,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(unsigned short* retVal, hipTextureObject_t textureObject,
- float x, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(ushort1* retVal, hipTextureObject_t textureObject, float x,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(ushort2* retVal, hipTextureObject_t textureObject, float x,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(ushort4* retVal, hipTextureObject_t textureObject, float x,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(int* retVal, hipTextureObject_t textureObject, float x,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(int1* retVal, hipTextureObject_t textureObject, float x,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(int2* retVal, hipTextureObject_t textureObject, float x,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(int4* retVal, hipTextureObject_t textureObject, float x,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(unsigned int* retVal, hipTextureObject_t textureObject,
- float x, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(uint1* retVal, hipTextureObject_t textureObject, float x,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(uint2* retVal, hipTextureObject_t textureObject, float x,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(uint4* retVal, hipTextureObject_t textureObject, float x,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(float* retVal, hipTextureObject_t textureObject, float x,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_SET_FLOAT;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(float1* retVal, hipTextureObject_t textureObject, float x,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_SET_FLOAT_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(float2* retVal, hipTextureObject_t textureObject, float x,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_SET_FLOAT_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(float4* retVal, hipTextureObject_t textureObject, float x,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_SET_FLOAT_XYZW;
-}
-
-template <class T>
-__TEXTURE_FUNCTIONS_DECL__ T tex1DLod(hipTextureObject_t textureObject, float x, float level) {
- T ret;
- tex1DLod(&ret, textureObject, x, level);
- return ret;
-}
-
-////////////////////////////////////////////////////////////
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(char* retVal, hipTextureObject_t textureObject, float x,
- float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(char1* retVal, hipTextureObject_t textureObject, float x,
- float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(char2* retVal, hipTextureObject_t textureObject, float x,
- float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(char4* retVal, hipTextureObject_t textureObject, float x,
- float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(unsigned char* retVal, hipTextureObject_t textureObject,
- float x, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(uchar1* retVal, hipTextureObject_t textureObject, float x,
- float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(uchar2* retVal, hipTextureObject_t textureObject, float x,
- float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(uchar4* retVal, hipTextureObject_t textureObject, float x,
- float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(short* retVal, hipTextureObject_t textureObject, float x,
- float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(short1* retVal, hipTextureObject_t textureObject, float x,
- float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(short2* retVal, hipTextureObject_t textureObject, float x,
- float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(short4* retVal, hipTextureObject_t textureObject, float x,
- float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(unsigned short* retVal, hipTextureObject_t textureObject,
- float x, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(ushort1* retVal, hipTextureObject_t textureObject,
- float x, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(ushort2* retVal, hipTextureObject_t textureObject,
- float x, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(ushort4* retVal, hipTextureObject_t textureObject,
- float x, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(int* retVal, hipTextureObject_t textureObject, float x,
- float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(int1* retVal, hipTextureObject_t textureObject, float x,
- float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(int2* retVal, hipTextureObject_t textureObject, float x,
- float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(int4* retVal, hipTextureObject_t textureObject, float x,
- float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(unsigned int* retVal, hipTextureObject_t textureObject,
- float x, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(uint1* retVal, hipTextureObject_t textureObject, float x,
- float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(uint2* retVal, hipTextureObject_t textureObject, float x,
- float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(uint4* retVal, hipTextureObject_t textureObject, float x,
- float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(float* retVal, hipTextureObject_t textureObject, float x,
- float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_SET_FLOAT;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(float1* retVal, hipTextureObject_t textureObject, float x,
- float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_SET_FLOAT_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(float2* retVal, hipTextureObject_t textureObject, float x,
- float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_SET_FLOAT_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(float4* retVal, hipTextureObject_t textureObject, float x,
- float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_SET_FLOAT_XYZW;
-}
-
-template <class T>
-__TEXTURE_FUNCTIONS_DECL__ T tex1DGrad(hipTextureObject_t textureObject, float x, float dx,
- float dy) {
- T ret;
- tex1DLod(&ret, textureObject, x, dx, dy);
- return ret;
-}
-
-////////////////////////////////////////////////////////////
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(char* retVal, hipTextureObject_t textureObject, float x,
- float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(char1* retVal, hipTextureObject_t textureObject, float x,
- float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(char2* retVal, hipTextureObject_t textureObject, float x,
- float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(char4* retVal, hipTextureObject_t textureObject, float x,
- float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(unsigned char* retVal, hipTextureObject_t textureObject,
- float x, float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(uchar1* retVal, hipTextureObject_t textureObject, float x,
- float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(uchar2* retVal, hipTextureObject_t textureObject, float x,
- float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(uchar4* retVal, hipTextureObject_t textureObject, float x,
- float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(short* retVal, hipTextureObject_t textureObject, float x,
- float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(short1* retVal, hipTextureObject_t textureObject, float x,
- float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(short2* retVal, hipTextureObject_t textureObject, float x,
- float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(short4* retVal, hipTextureObject_t textureObject, float x,
- float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(unsigned short* retVal, hipTextureObject_t textureObject,
- float x, float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(ushort1* retVal, hipTextureObject_t textureObject, float x,
- float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(ushort2* retVal, hipTextureObject_t textureObject, float x,
- float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(ushort4* retVal, hipTextureObject_t textureObject, float x,
- float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(int* retVal, hipTextureObject_t textureObject, float x,
- float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(int1* retVal, hipTextureObject_t textureObject, float x,
- float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(int2* retVal, hipTextureObject_t textureObject, float x,
- float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(int4* retVal, hipTextureObject_t textureObject, float x,
- float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(unsigned int* retVal, hipTextureObject_t textureObject,
- float x, float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(uint1* retVal, hipTextureObject_t textureObject, float x,
- float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(uint2* retVal, hipTextureObject_t textureObject, float x,
- float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(uint4* retVal, hipTextureObject_t textureObject, float x,
- float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(float* retVal, hipTextureObject_t textureObject, float x,
- float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_SET_FLOAT;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(float1* retVal, hipTextureObject_t textureObject, float x,
- float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_SET_FLOAT_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(float2* retVal, hipTextureObject_t textureObject, float x,
- float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_SET_FLOAT_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(float4* retVal, hipTextureObject_t textureObject, float x,
- float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_SET_FLOAT_XYZW;
-}
-
-template <class T>
-__TEXTURE_FUNCTIONS_DECL__ T tex2D(hipTextureObject_t textureObject, float x, float y) {
- T ret;
- tex2D(&ret, textureObject, x, y);
- return ret;
-}
-
-////////////////////////////////////////////////////////////
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(char* retVal, hipTextureObject_t textureObject, float x,
- float y, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(char1* retVal, hipTextureObject_t textureObject, float x,
- float y, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(char2* retVal, hipTextureObject_t textureObject, float x,
- float y, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(char4* retVal, hipTextureObject_t textureObject, float x,
- float y, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(unsigned char* retVal, hipTextureObject_t textureObject,
- float x, float y, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(uchar1* retVal, hipTextureObject_t textureObject, float x,
- float y, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(uchar2* retVal, hipTextureObject_t textureObject, float x,
- float y, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(uchar4* retVal, hipTextureObject_t textureObject, float x,
- float y, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(short* retVal, hipTextureObject_t textureObject, float x,
- float y, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(short1* retVal, hipTextureObject_t textureObject, float x,
- float y, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(short2* retVal, hipTextureObject_t textureObject, float x,
- float y, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(short4* retVal, hipTextureObject_t textureObject, float x,
- float y, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(unsigned short* retVal, hipTextureObject_t textureObject,
- float x, float y, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(ushort1* retVal, hipTextureObject_t textureObject, float x,
- float y, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(ushort2* retVal, hipTextureObject_t textureObject, float x,
- float y, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(ushort4* retVal, hipTextureObject_t textureObject, float x,
- float y, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(int* retVal, hipTextureObject_t textureObject, float x,
- float y, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(int1* retVal, hipTextureObject_t textureObject, float x,
- float y, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(int2* retVal, hipTextureObject_t textureObject, float x,
- float y, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(int4* retVal, hipTextureObject_t textureObject, float x,
- float y, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(unsigned int* retVal, hipTextureObject_t textureObject,
- float x, float y, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(uint1* retVal, hipTextureObject_t textureObject, float x,
- float y, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(uint2* retVal, hipTextureObject_t textureObject, float x,
- float y, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(uint4* retVal, hipTextureObject_t textureObject, float x,
- float y, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(float* retVal, hipTextureObject_t textureObject, float x,
- float y, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_SET_FLOAT;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(float1* retVal, hipTextureObject_t textureObject, float x,
- float y, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_SET_FLOAT_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(float2* retVal, hipTextureObject_t textureObject, float x,
- float y, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_SET_FLOAT_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(float4* retVal, hipTextureObject_t textureObject, float x,
- float y, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_SET_FLOAT_XYZW;
-}
-
-template <class T>
-__TEXTURE_FUNCTIONS_DECL__ T tex2DLod(hipTextureObject_t textureObject, float x, float y,
- float level) {
- T ret;
- tex2DLod(&ret, textureObject, x, y, level);
- return ret;
-}
-
-////////////////////////////////////////////////////////////
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(char* retVal, hipTextureObject_t textureObject, float x,
- float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(char1* retVal, hipTextureObject_t textureObject, float x,
- float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(char2* retVal, hipTextureObject_t textureObject, float x,
- float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(char4* retVal, hipTextureObject_t textureObject, float x,
- float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(unsigned char* retVal, hipTextureObject_t textureObject,
- float x, float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(uchar1* retVal, hipTextureObject_t textureObject, float x,
- float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(uchar2* retVal, hipTextureObject_t textureObject, float x,
- float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(uchar4* retVal, hipTextureObject_t textureObject, float x,
- float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(short* retVal, hipTextureObject_t textureObject, float x,
- float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(short1* retVal, hipTextureObject_t textureObject, float x,
- float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(short2* retVal, hipTextureObject_t textureObject, float x,
- float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(short4* retVal, hipTextureObject_t textureObject, float x,
- float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(unsigned short* retVal, hipTextureObject_t textureObject,
- float x, float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(ushort1* retVal, hipTextureObject_t textureObject, float x,
- float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(ushort2* retVal, hipTextureObject_t textureObject, float x,
- float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(ushort4* retVal, hipTextureObject_t textureObject, float x,
- float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(int* retVal, hipTextureObject_t textureObject, float x,
- float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(int1* retVal, hipTextureObject_t textureObject, float x,
- float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(int2* retVal, hipTextureObject_t textureObject, float x,
- float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(int4* retVal, hipTextureObject_t textureObject, float x,
- float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(unsigned int* retVal, hipTextureObject_t textureObject,
- float x, float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(uint1* retVal, hipTextureObject_t textureObject, float x,
- float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(uint2* retVal, hipTextureObject_t textureObject, float x,
- float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(uint4* retVal, hipTextureObject_t textureObject, float x,
- float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(float* retVal, hipTextureObject_t textureObject, float x,
- float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_SET_FLOAT;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(float1* retVal, hipTextureObject_t textureObject, float x,
- float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_SET_FLOAT_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(float2* retVal, hipTextureObject_t textureObject, float x,
- float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_SET_FLOAT_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(float4* retVal, hipTextureObject_t textureObject, float x,
- float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_SET_FLOAT_XYZW;
-}
-
-template <class T>
-__TEXTURE_FUNCTIONS_DECL__ T tex3D(hipTextureObject_t textureObject, float x, float y, float z) {
- T ret;
- tex3D(&ret, textureObject, x, y, z);
- return ret;
-}
-
-////////////////////////////////////////////////////////////
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(char* retVal, hipTextureObject_t textureObject, float x,
- float y, float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(char1* retVal, hipTextureObject_t textureObject, float x,
- float y, float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(char2* retVal, hipTextureObject_t textureObject, float x,
- float y, float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(char4* retVal, hipTextureObject_t textureObject, float x,
- float y, float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(unsigned char* retVal, hipTextureObject_t textureObject,
- float x, float y, float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(uchar1* retVal, hipTextureObject_t textureObject, float x,
- float y, float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(uchar2* retVal, hipTextureObject_t textureObject, float x,
- float y, float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(uchar4* retVal, hipTextureObject_t textureObject, float x,
- float y, float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(short* retVal, hipTextureObject_t textureObject, float x,
- float y, float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(short1* retVal, hipTextureObject_t textureObject, float x,
- float y, float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(short2* retVal, hipTextureObject_t textureObject, float x,
- float y, float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(short4* retVal, hipTextureObject_t textureObject, float x,
- float y, float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(unsigned short* retVal, hipTextureObject_t textureObject,
- float x, float y, float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(ushort1* retVal, hipTextureObject_t textureObject, float x,
- float y, float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(ushort2* retVal, hipTextureObject_t textureObject, float x,
- float y, float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(ushort4* retVal, hipTextureObject_t textureObject, float x,
- float y, float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(int* retVal, hipTextureObject_t textureObject, float x,
- float y, float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(int1* retVal, hipTextureObject_t textureObject, float x,
- float y, float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(int2* retVal, hipTextureObject_t textureObject, float x,
- float y, float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(int4* retVal, hipTextureObject_t textureObject, float x,
- float y, float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(unsigned int* retVal, hipTextureObject_t textureObject,
- float x, float y, float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(uint1* retVal, hipTextureObject_t textureObject, float x,
- float y, float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(uint2* retVal, hipTextureObject_t textureObject, float x,
- float y, float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(uint4* retVal, hipTextureObject_t textureObject, float x,
- float y, float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(float* retVal, hipTextureObject_t textureObject, float x,
- float y, float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_SET_FLOAT;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(float1* retVal, hipTextureObject_t textureObject, float x,
- float y, float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_SET_FLOAT_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(float2* retVal, hipTextureObject_t textureObject, float x,
- float y, float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_SET_FLOAT_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(float4* retVal, hipTextureObject_t textureObject, float x,
- float y, float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_SET_FLOAT_XYZW;
-}
-
-template <class T>
-__TEXTURE_FUNCTIONS_DECL__ T tex3DLod(hipTextureObject_t textureObject, float x, float y, float z,
- float level) {
- T ret;
- tex3DLod(&ret, textureObject, x, y, z, level);
- return ret;
-}
-
-////////////////////////////////////////////////////////////
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(char* retVal, hipTextureObject_t textureObject,
- float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(char1* retVal, hipTextureObject_t textureObject,
- float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(char2* retVal, hipTextureObject_t textureObject,
- float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_SET_SIGNED_XY;
-}
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(char4* retVal, hipTextureObject_t textureObject,
- float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(unsigned char* retVal,
- hipTextureObject_t textureObject, float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(uchar1* retVal, hipTextureObject_t textureObject,
- float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(uchar2* retVal, hipTextureObject_t textureObject,
- float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_SET_UNSIGNED_XY;
-}
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(uchar4* retVal, hipTextureObject_t textureObject,
- float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(short* retVal, hipTextureObject_t textureObject,
- float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(short1* retVal, hipTextureObject_t textureObject,
- float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(short2* retVal, hipTextureObject_t textureObject,
- float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_SET_SIGNED_XY;
-}
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(short4* retVal, hipTextureObject_t textureObject,
- float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(unsigned short* retVal,
- hipTextureObject_t textureObject, float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(ushort1* retVal, hipTextureObject_t textureObject,
- float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(ushort2* retVal, hipTextureObject_t textureObject,
- float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_SET_UNSIGNED_XY;
-}
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(ushort4* retVal, hipTextureObject_t textureObject,
- float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(int* retVal, hipTextureObject_t textureObject, float x,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(int1* retVal, hipTextureObject_t textureObject,
- float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(int2* retVal, hipTextureObject_t textureObject,
- float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_SET_SIGNED_XY;
-}
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(int4* retVal, hipTextureObject_t textureObject,
- float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(unsigned int* retVal, hipTextureObject_t textureObject,
- float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(uint1* retVal, hipTextureObject_t textureObject,
- float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(uint2* retVal, hipTextureObject_t textureObject,
- float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_SET_UNSIGNED_XY;
-}
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(uint4* retVal, hipTextureObject_t textureObject,
- float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(float* retVal, hipTextureObject_t textureObject,
- float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_SET_FLOAT;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(float1* retVal, hipTextureObject_t textureObject,
- float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_SET_FLOAT_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(float2* retVal, hipTextureObject_t textureObject,
- float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_SET_FLOAT_XY;
-}
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(float4* retVal, hipTextureObject_t textureObject,
- float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_SET_FLOAT_XYZW;
-}
-
-template <class T>
-__TEXTURE_FUNCTIONS_DECL__ T tex1DLayered(hipTextureObject_t textureObject, float x, int layer) {
- T ret;
- tex1DLayered(&ret, textureObject, x, layer);
- return ret;
-}
-
-////////////////////////////////////////////////////////////
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(char* retVal, hipTextureObject_t textureObject,
- float x, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(char1* retVal, hipTextureObject_t textureObject,
- float x, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(char2* retVal, hipTextureObject_t textureObject,
- float x, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(char4* retVal, hipTextureObject_t textureObject,
- float x, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(unsigned char* retVal,
- hipTextureObject_t textureObject, float x,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(uchar1* retVal, hipTextureObject_t textureObject,
- float x, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(uchar2* retVal, hipTextureObject_t textureObject,
- float x, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(uchar4* retVal, hipTextureObject_t textureObject,
- float x, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(short* retVal, hipTextureObject_t textureObject,
- float x, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(short1* retVal, hipTextureObject_t textureObject,
- float x, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(short2* retVal, hipTextureObject_t textureObject,
- float x, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(short4* retVal, hipTextureObject_t textureObject,
- float x, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(unsigned short* retVal,
- hipTextureObject_t textureObject, float x,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(ushort1* retVal, hipTextureObject_t textureObject,
- float x, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(ushort2* retVal, hipTextureObject_t textureObject,
- float x, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(ushort4* retVal, hipTextureObject_t textureObject,
- float x, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(int* retVal, hipTextureObject_t textureObject,
- float x, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(int1* retVal, hipTextureObject_t textureObject,
- float x, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(int2* retVal, hipTextureObject_t textureObject,
- float x, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(int4* retVal, hipTextureObject_t textureObject,
- float x, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(unsigned int* retVal,
- hipTextureObject_t textureObject, float x,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(uint1* retVal, hipTextureObject_t textureObject,
- float x, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(uint2* retVal, hipTextureObject_t textureObject,
- float x, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(uint4* retVal, hipTextureObject_t textureObject,
- float x, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(float* retVal, hipTextureObject_t textureObject,
- float x, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_SET_FLOAT;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(float1* retVal, hipTextureObject_t textureObject,
- float x, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_SET_FLOAT_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(float2* retVal, hipTextureObject_t textureObject,
- float x, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_SET_FLOAT_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(float4* retVal, hipTextureObject_t textureObject,
- float x, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_SET_FLOAT_XYZW;
-}
-
-template <class T>
-__TEXTURE_FUNCTIONS_DECL__ T tex1DLayeredLod(hipTextureObject_t textureObject, float x, int layer,
- float level) {
- T ret;
- tex1DLayeredLod(&ret, textureObject, x, layer, level);
- return ret;
-}
-
-////////////////////////////////////////////////////////////
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(char* retVal, hipTextureObject_t textureObject,
- float x, int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(char1* retVal, hipTextureObject_t textureObject,
- float x, int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(char2* retVal, hipTextureObject_t textureObject,
- float x, int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(char4* retVal, hipTextureObject_t textureObject,
- float x, int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(unsigned char* retVal,
- hipTextureObject_t textureObject, float x,
- int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(uchar1* retVal, hipTextureObject_t textureObject,
- float x, int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(uchar2* retVal, hipTextureObject_t textureObject,
- float x, int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(uchar4* retVal, hipTextureObject_t textureObject,
- float x, int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(short* retVal, hipTextureObject_t textureObject,
- float x, int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(short1* retVal, hipTextureObject_t textureObject,
- float x, int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(short2* retVal, hipTextureObject_t textureObject,
- float x, int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(short4* retVal, hipTextureObject_t textureObject,
- float x, int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(unsigned short* retVal,
- hipTextureObject_t textureObject, float x,
- int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(ushort1* retVal, hipTextureObject_t textureObject,
- float x, int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(ushort2* retVal, hipTextureObject_t textureObject,
- float x, int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(ushort4* retVal, hipTextureObject_t textureObject,
- float x, int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(int* retVal, hipTextureObject_t textureObject,
- float x, int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(int1* retVal, hipTextureObject_t textureObject,
- float x, int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(int2* retVal, hipTextureObject_t textureObject,
- float x, int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(int4* retVal, hipTextureObject_t textureObject,
- float x, int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(unsigned int* retVal,
- hipTextureObject_t textureObject, float x,
- int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(uint1* retVal, hipTextureObject_t textureObject,
- float x, int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(uint2* retVal, hipTextureObject_t textureObject,
- float x, int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(uint4* retVal, hipTextureObject_t textureObject,
- float x, int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(float* retVal, hipTextureObject_t textureObject,
- float x, int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_SET_FLOAT;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(float1* retVal, hipTextureObject_t textureObject,
- float x, int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_SET_FLOAT_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(float2* retVal, hipTextureObject_t textureObject,
- float x, int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_SET_FLOAT_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(float4* retVal, hipTextureObject_t textureObject,
- float x, int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_SET_FLOAT_XYZW;
-}
-
-template <class T>
-__TEXTURE_FUNCTIONS_DECL__ T tex1DLayeredGrad(hipTextureObject_t textureObject, float x, int layer,
- float dx, float dy) {
- T ret;
- tex1DLayeredGrad(&ret, textureObject, x, layer, dx, dy);
- return ret;
-}
-
-////////////////////////////////////////////////////////////
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(char* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(char1* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(char2* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(char4* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(unsigned char* retVal,
- hipTextureObject_t textureObject, float x, float y,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(uchar1* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(uchar2* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(uchar4* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(short* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(short1* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(short2* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(short4* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(unsigned short* retVal,
- hipTextureObject_t textureObject, float x, float y,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(ushort1* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(ushort2* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(ushort4* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(int* retVal, hipTextureObject_t textureObject, float x,
- float y, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(int1* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(int2* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(int4* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(unsigned int* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(uint1* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(uint2* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(uint4* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(float* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_SET_FLOAT;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(float1* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_SET_FLOAT_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(float2* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_SET_FLOAT_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(float4* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_SET_FLOAT_XYZW;
-}
-
-template <class T>
-__TEXTURE_FUNCTIONS_DECL__ T tex2DLayered(hipTextureObject_t textureObject, float x, float y,
- int layer) {
- T ret;
- tex2DLayered(&ret, textureObject, x, y, layer);
- return ret;
-}
-
-////////////////////////////////////////////////////////////
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(char* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(char1* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(char2* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(char4* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(unsigned char* retVal,
- hipTextureObject_t textureObject, float x, float y,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(uchar1* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(uchar2* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(uchar4* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(short* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(short1* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(short2* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(short4* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(unsigned short* retVal,
- hipTextureObject_t textureObject, float x, float y,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(ushort1* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(ushort2* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(ushort4* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(int* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(int1* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(int2* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(int4* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(unsigned int* retVal,
- hipTextureObject_t textureObject, float x, float y,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(uint1* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(uint2* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(uint4* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(float* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_SET_FLOAT;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(float1* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_SET_FLOAT_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(float2* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_SET_FLOAT_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(float4* retVal, hipTextureObject_t textureObject,
- float x, float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_SET_FLOAT_XYZW;
-}
-
-template <class T>
-__TEXTURE_FUNCTIONS_DECL__ T tex2DLayeredLod(hipTextureObject_t textureObject, float x, float y,
- int layer, float level) {
- T ret;
- tex2DLayeredLod(&ret, textureObject, x, y, layer, level);
- return ret;
-}
-
-////////////////////////////////////////////////////////////
-// Texture Reference APIs
-////////////////////////////////////////////////////////////
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex1Dfetch(texture<char, texType, mode> texRef, int x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex1Dfetch(texture<char1, texType, mode> texRef, int x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex1Dfetch(texture<char2, texType, mode> texRef, int x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex1Dfetch(texture<char4, texType, mode> texRef, int x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1Dfetch(texture<unsigned char, texType, mode> texRef,
- int x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1Dfetch(texture<uchar1, texType, mode> texRef, int x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1Dfetch(texture<uchar2, texType, mode> texRef, int x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1Dfetch(texture<uchar4, texType, mode> texRef, int x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex1Dfetch(texture<short, texType, mode> texRef, int x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex1Dfetch(texture<short1, texType, mode> texRef, int x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex1Dfetch(texture<short2, texType, mode> texRef, int x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex1Dfetch(texture<short4, texType, mode> texRef, int x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1Dfetch(texture<ushort1, texType, mode> texRef, int x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1Dfetch(texture<unsigned short, texType, mode> texRef,
- int x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1Dfetch(texture<ushort2, texType, mode> texRef, int x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1Dfetch(texture<ushort4, texType, mode> texRef, int x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex1Dfetch(texture<int1, texType, mode> texRef, int x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex1Dfetch(texture<int, texType, mode> texRef, int x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex1Dfetch(texture<int2, texType, mode> texRef, int x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex1Dfetch(texture<int4, texType, mode> texRef, int x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1Dfetch(texture<unsigned int, texType, mode> texRef,
- int x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex1Dfetch(texture<uint1, texType, mode> texRef, int x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex1Dfetch(texture<uint2, texType, mode> texRef, int x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex1Dfetch(texture<uint4, texType, mode> texRef, int x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex1Dfetch(texture<float, texType, mode> texRef, int x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex1Dfetch(texture<float1, texType, mode> texRef, int x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex1Dfetch(texture<float2, texType, mode> texRef, int x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex1Dfetch(texture<float4, texType, mode> texRef, int x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex1Dfetch(texture<char, texType, mode> texRef,
- hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex1Dfetch(texture<char1, texType, mode> texRef,
- hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex1Dfetch(texture<char2, texType, mode> texRef,
- hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex1Dfetch(texture<char4, texType, mode> texRef,
- hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1Dfetch(texture<unsigned char, texType, mode> texRef,
- hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1Dfetch(texture<uchar1, texType, mode> texRef,
- hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1Dfetch(texture<uchar2, texType, mode> texRef,
- hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1Dfetch(texture<uchar4, texType, mode> texRef,
- hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex1Dfetch(texture<short, texType, mode> texRef,
- hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex1Dfetch(texture<short1, texType, mode> texRef,
- hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex1Dfetch(texture<short2, texType, mode> texRef,
- hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex1Dfetch(texture<short4, texType, mode> texRef,
- hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1Dfetch(texture<ushort1, texType, mode> texRef,
- hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1Dfetch(texture<unsigned short, texType, mode> texRef,
- hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1Dfetch(texture<ushort2, texType, mode> texRef,
- hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1Dfetch(texture<ushort4, texType, mode> texRef,
- hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex1Dfetch(texture<int1, texType, mode> texRef,
- hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex1Dfetch(texture<int, texType, mode> texRef,
- hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex1Dfetch(texture<int2, texType, mode> texRef,
- hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex1Dfetch(texture<int4, texType, mode> texRef,
- hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1Dfetch(texture<unsigned int, texType, mode> texRef,
- hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex1Dfetch(texture<uint1, texType, mode> texRef,
- hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex1Dfetch(texture<uint2, texType, mode> texRef,
- hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex1Dfetch(texture<uint4, texType, mode> texRef,
- hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex1Dfetch(texture<float, texType, mode> texRef,
- hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex1Dfetch(texture<float1, texType, mode> texRef,
- hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex1Dfetch(texture<float2, texType, mode> texRef,
- hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex1Dfetch(texture<float4, texType, mode> texRef,
- hipTextureObject_t textureObject, int x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex1D(texture<char, texType, mode> texRef, float x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex1D(texture<char1, texType, mode> texRef, float x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex1D(texture<char2, texType, mode> texRef, float x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex1D(texture<char4, texType, mode> texRef, float x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1D(texture<unsigned char, texType, mode> texRef,
- float x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1D(texture<uchar1, texType, mode> texRef, float x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1D(texture<uchar2, texType, mode> texRef, float x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1D(texture<uchar4, texType, mode> texRef, float x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex1D(texture<short, texType, mode> texRef, float x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex1D(texture<short1, texType, mode> texRef, float x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex1D(texture<short2, texType, mode> texRef, float x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex1D(texture<short4, texType, mode> texRef, float x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1D(texture<unsigned short, texType, mode> texRef,
- float x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1D(texture<ushort1, texType, mode> texRef, float x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1D(texture<ushort2, texType, mode> texRef, float x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1D(texture<ushort4, texType, mode> texRef, float x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex1D(texture<int, texType, mode> texRef, float x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex1D(texture<int1, texType, mode> texRef, float x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex1D(texture<int2, texType, mode> texRef, float x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex1D(texture<int4, texType, mode> texRef, float x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1D(texture<unsigned int, texType, mode> texRef, float x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex1D(texture<uint1, texType, mode> texRef, float x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex1D(texture<uint2, texType, mode> texRef, float x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex1D(texture<uint4, texType, mode> texRef, float x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex1D(texture<float1, texType, mode> texRef, float x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex1D(texture<float2, texType, mode> texRef, float x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex1D(texture<float4, texType, mode> texRef, float x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex1D(texture<char, texType, mode> texRef,
- hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex1D(texture<char1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex1D(texture<char2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex1D(texture<char4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1D(texture<unsigned char, texType, mode> texRef,
- hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1D(texture<uchar1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1D(texture<uchar2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1D(texture<uchar4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex1D(texture<short, texType, mode> texRef,
- hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex1D(texture<short1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex1D(texture<short2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex1D(texture<short4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1D(texture<unsigned short, texType, mode> texRef,
- hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1D(texture<ushort1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1D(texture<ushort2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1D(texture<ushort4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex1D(texture<int, texType, mode> texRef,
- hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex1D(texture<int1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex1D(texture<int2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex1D(texture<int4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1D(texture<unsigned int, texType, mode> texRef,
- hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex1D(texture<uint1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex1D(texture<uint2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex1D(texture<uint4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex1D(texture<float, texType, mode> texRef,
- hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_FLOAT;
-}
-//////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex1D(texture<float, texType, mode> texRef, float x) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex1D(texture<float1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex1D(texture<float2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex1D(texture<float4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1D(i, s, x);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex1DLod(texture<char, texType, mode> texRef, float x,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLod(texture<char1, texType, mode> texRef, float x,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_CHAR_X;
-}
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLod(texture<char2, texType, mode> texRef, float x,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLod(texture<char4, texType, mode> texRef, float x,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLod(texture<unsigned char, texType, mode> texRef,
- float x, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLod(texture<uchar1, texType, mode> texRef, float x,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLod(texture<uchar2, texType, mode> texRef, float x,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLod(texture<uchar4, texType, mode> texRef, float x,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex1DLod(texture<short, texType, mode> texRef, float x,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLod(texture<short1, texType, mode> texRef, float x,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLod(texture<short2, texType, mode> texRef, float x,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLod(texture<short4, texType, mode> texRef, float x,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLod(texture<unsigned short, texType, mode> texRef,
- float x, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLod(texture<ushort1, texType, mode> texRef, float x,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLod(texture<ushort2, texType, mode> texRef, float x,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLod(texture<ushort4, texType, mode> texRef, float x,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex1DLod(texture<int, texType, mode> texRef, float x, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLod(texture<int1, texType, mode> texRef, float x,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLod(texture<int2, texType, mode> texRef, float x,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLod(texture<int4, texType, mode> texRef, float x,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLod(texture<unsigned int, texType, mode> texRef,
- float x, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLod(texture<uint1, texType, mode> texRef, float x,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLod(texture<uint2, texType, mode> texRef, float x,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLod(texture<uint4, texType, mode> texRef, float x,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex1DLod(texture<float, texType, mode> texRef, float x,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLod(texture<float1, texType, mode> texRef, float x,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLod(texture<float2, texType, mode> texRef, float x,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLod(texture<float4, texType, mode> texRef, float x,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex1DLod(texture<char, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLod(texture<char1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_CHAR_X;
-}
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLod(texture<char2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLod(texture<char4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLod(texture<unsigned char, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLod(texture<uchar1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLod(texture<uchar2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLod(texture<uchar4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex1DLod(texture<short, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLod(texture<short1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLod(texture<short2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLod(texture<short4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLod(texture<unsigned short, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLod(texture<ushort1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLod(texture<ushort2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLod(texture<ushort4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex1DLod(texture<int, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLod(texture<int1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLod(texture<int2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLod(texture<int4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLod(texture<unsigned int, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLod(texture<uint1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLod(texture<uint2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLod(texture<uint4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex1DLod(texture<float, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLod(texture<float1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLod(texture<float2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLod(texture<float4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex1DGrad(texture<char, texType, mode> texRef, float x, float dx,
- float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex1DGrad(texture<char1, texType, mode> texRef, float x, float dx,
- float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex1DGrad(texture<char2, texType, mode> texRef, float x, float dx,
- float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex1DGrad(texture<char4, texType, mode> texRef, float x, float dx,
- float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DGrad(texture<unsigned char, texType, mode> texRef,
- float x, float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DGrad(texture<uchar1, texType, mode> texRef, float x,
- float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DGrad(texture<uchar2, texType, mode> texRef, float x,
- float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DGrad(texture<uchar4, texType, mode> texRef, float x,
- float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex1DGrad(texture<short, texType, mode> texRef, float x, float dx,
- float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex1DGrad(texture<short1, texType, mode> texRef, float x,
- float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex1DGrad(texture<short2, texType, mode> texRef, float x,
- float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex1DGrad(texture<short4, texType, mode> texRef, float x,
- float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DGrad(texture<unsigned short, texType, mode> texRef,
- float x, float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DGrad(texture<ushort1, texType, mode> texRef, float x,
- float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DGrad(texture<ushort2, texType, mode> texRef, float x,
- float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DGrad(texture<ushort4, texType, mode> texRef, float x,
- float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex1DGrad(texture<int, texType, mode> texRef, float x, float dx,
- float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex1DGrad(texture<int1, texType, mode> texRef, float x, float dx,
- float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex1DGrad(texture<int2, texType, mode> texRef, float x, float dx,
- float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex1DGrad(texture<int4, texType, mode> texRef, float x, float dx,
- float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DGrad(texture<unsigned int, texType, mode> texRef,
- float x, float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DGrad(texture<uint1, texType, mode> texRef, float x, float dx,
- float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DGrad(texture<uint2, texType, mode> texRef, float x, float dx,
- float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DGrad(texture<uint4, texType, mode> texRef, float x, float dx,
- float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex1DGrad(texture<float, texType, mode> texRef, float x, float dx,
- float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex1DGrad(texture<float1, texType, mode> texRef, float x,
- float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex1DGrad(texture<float2, texType, mode> texRef, float x,
- float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex1DGrad(texture<float4, texType, mode> texRef, float x,
- float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex1DGrad(texture<char, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float dx,
- float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex1DGrad(texture<char1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float dx,
- float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex1DGrad(texture<char2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float dx,
- float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex1DGrad(texture<char4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float dx,
- float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DGrad(texture<unsigned char, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DGrad(texture<uchar1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float dx,
- float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DGrad(texture<uchar2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float dx,
- float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DGrad(texture<uchar4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float dx,
- float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex1DGrad(texture<short, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float dx,
- float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex1DGrad(texture<short1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float dx,
- float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex1DGrad(texture<short2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float dx,
- float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex1DGrad(texture<short4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float dx,
- float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DGrad(texture<unsigned short, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DGrad(texture<ushort1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float dx,
- float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DGrad(texture<ushort2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float dx,
- float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DGrad(texture<ushort4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float dx,
- float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex1DGrad(texture<int, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float dx,
- float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex1DGrad(texture<int1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float dx,
- float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex1DGrad(texture<int2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float dx,
- float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex1DGrad(texture<int4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float dx,
- float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DGrad(texture<unsigned int, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DGrad(texture<uint1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float dx,
- float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DGrad(texture<uint2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float dx,
- float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DGrad(texture<uint4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float dx,
- float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex1DGrad(texture<float, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float dx,
- float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex1DGrad(texture<float1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float dx,
- float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex1DGrad(texture<float2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float dx,
- float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex1DGrad(texture<float4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float dx,
- float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex2D(texture<char, texType, mode> texRef, float x, float y) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex2D(texture<char1, texType, mode> texRef, float x, float y) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex2D(texture<char2, texType, mode> texRef, float x, float y) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex2D(texture<char4, texType, mode> texRef, float x, float y) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2D(texture<unsigned char, texType, mode> texRef,
- float x, float y) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2D(texture<uchar1, texType, mode> texRef, float x, float y) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2D(texture<uchar2, texType, mode> texRef, float x, float y) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2D(texture<uchar4, texType, mode> texRef, float x, float y) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex2D(texture<short, texType, mode> texRef, float x, float y) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex2D(texture<short1, texType, mode> texRef, float x, float y) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex2D(texture<short2, texType, mode> texRef, float x, float y) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex2D(texture<short4, texType, mode> texRef, float x, float y) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2D(texture<unsigned short, texType, mode> texRef,
- float x, float y) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2D(texture<ushort1, texType, mode> texRef, float x, float y) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2D(texture<ushort2, texType, mode> texRef, float x, float y) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2D(texture<ushort4, texType, mode> texRef, float x, float y) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex2D(texture<int, texType, mode> texRef, float x, float y) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex2D(texture<int1, texType, mode> texRef, float x, float y) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex2D(texture<int2, texType, mode> texRef, float x, float y) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex2D(texture<int4, texType, mode> texRef, float x, float y) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2D(texture<unsigned int, texType, mode> texRef, float x,
- float y) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex2D(texture<uint1, texType, mode> texRef, float x, float y) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex2D(texture<uint2, texType, mode> texRef, float x, float y) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex2D(texture<uint4, texType, mode> texRef, float x, float y) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex2D(texture<char, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex2D(texture<char1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex2D(texture<char2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex2D(texture<char4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2D(texture<unsigned char, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2D(texture<uchar1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2D(texture<uchar2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2D(texture<uchar4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex2D(texture<short, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex2D(texture<short1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex2D(texture<short2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex2D(texture<short4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2D(texture<unsigned short, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2D(texture<ushort1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2D(texture<ushort2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2D(texture<ushort4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex2D(texture<int, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex2D(texture<int1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex2D(texture<int2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex2D(texture<int4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2D(texture<unsigned int, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex2D(texture<uint1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex2D(texture<uint2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex2D(texture<uint4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex2D(texture<float, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex2D(texture<float, texType, mode> texRef, float x, float y) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex2D(texture<float1, texType, mode> texRef, float x, float y) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex2D(texture<float1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex2D(texture<float2, texType, mode> texRef, float x, float y) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex2D(texture<float2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex2D(texture<float4, texType, mode> texRef, float x, float y) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex2D(texture<float4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex2DLod(texture<char, texType, mode> texRef, float x, float y,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLod(texture<char1, texType, mode> texRef, float x, float y,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLod(texture<char2, texType, mode> texRef, float x, float y,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLod(texture<char4, texType, mode> texRef, float x, float y,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLod(texture<unsigned char, texType, mode> texRef,
- float x, float y, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLod(texture<uchar1, texType, mode> texRef, float x, float y,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLod(texture<uchar2, texType, mode> texRef, float x, float y,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLod(texture<uchar4, texType, mode> texRef, float x, float y,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex2DLod(texture<short, texType, mode> texRef, float x, float y,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLod(texture<short1, texType, mode> texRef, float x, float y,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLod(texture<short2, texType, mode> texRef, float x, float y,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLod(texture<short4, texType, mode> texRef, float x, float y,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLod(texture<unsigned short, texType, mode> texRef,
- float x, float y, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLod(texture<ushort1, texType, mode> texRef, float x,
- float y, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLod(texture<ushort2, texType, mode> texRef, float x,
- float y, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLod(texture<ushort4, texType, mode> texRef, float x,
- float y, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex2DLod(texture<int, texType, mode> texRef, float x, float y,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLod(texture<int1, texType, mode> texRef, float x, float y,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLod(texture<int2, texType, mode> texRef, float x, float y,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLod(texture<int4, texType, mode> texRef, float x, float y,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLod(texture<unsigned int, texType, mode> texRef,
- float x, float y, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLod(texture<uint1, texType, mode> texRef, float x, float y,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLod(texture<uint2, texType, mode> texRef, float x, float y,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLod(texture<uint4, texType, mode> texRef, float x, float y,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex2DLod(texture<float, texType, mode> texRef, float x, float y,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLod(texture<float1, texType, mode> texRef, float x, float y,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLod(texture<float2, texType, mode> texRef, float x, float y,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLod(texture<float4, texType, mode> texRef, float x, float y,
- float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex2DLod(texture<char, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLod(texture<char1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLod(texture<char2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLod(texture<char4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLod(texture<unsigned char, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLod(texture<uchar1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLod(texture<uchar2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLod(texture<uchar4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex2DLod(texture<short, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLod(texture<short1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLod(texture<short2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLod(texture<short4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLod(texture<unsigned short, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLod(texture<ushort1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLod(texture<ushort2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLod(texture<ushort4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex2DLod(texture<int, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLod(texture<int1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLod(texture<int2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLod(texture<int4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLod(texture<unsigned int, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLod(texture<uint1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLod(texture<uint2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLod(texture<uint4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex2DLod(texture<float, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLod(texture<float1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLod(texture<float2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLod(texture<float4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex2DGrad(texture<char, texType, mode> texRef, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex2DGrad(texture<char1, texType, mode> texRef, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex2DGrad(texture<char2, texType, mode> texRef, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex2DGrad(texture<char4, texType, mode> texRef, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DGrad(texture<unsigned char, texType, mode> texRef,
- float x, float y, float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DGrad(texture<uchar1, texType, mode> texRef, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DGrad(texture<uchar2, texType, mode> texRef, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DGrad(texture<uchar4, texType, mode> texRef, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex2DGrad(texture<short, texType, mode> texRef, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex2DGrad(texture<short1, texType, mode> texRef, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex2DGrad(texture<short2, texType, mode> texRef, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex2DGrad(texture<short4, texType, mode> texRef, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DGrad(texture<unsigned short, texType, mode> texRef,
- float x, float y, float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DGrad(texture<ushort1, texType, mode> texRef, float x,
- float y, float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DGrad(texture<ushort2, texType, mode> texRef, float x,
- float y, float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DGrad(texture<ushort4, texType, mode> texRef, float x,
- float y, float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex2DGrad(texture<int, texType, mode> texRef, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex2DGrad(texture<int1, texType, mode> texRef, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex2DGrad(texture<int2, texType, mode> texRef, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex2DGrad(texture<int4, texType, mode> texRef, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DGrad(texture<unsigned int, texType, mode> texRef,
- float x, float y, float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DGrad(texture<uint1, texType, mode> texRef, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DGrad(texture<uint2, texType, mode> texRef, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DGrad(texture<uint4, texType, mode> texRef, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex2DGrad(texture<float, texType, mode> texRef, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex2DGrad(texture<float1, texType, mode> texRef, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex2DGrad(texture<float2, texType, mode> texRef, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex2DGrad(texture<float4, texType, mode> texRef, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex2DGrad(texture<char, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex2DGrad(texture<char1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex2DGrad(texture<char2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex2DGrad(texture<char4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DGrad(texture<unsigned char, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DGrad(texture<uchar1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DGrad(texture<uchar2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DGrad(texture<uchar4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex2DGrad(texture<short, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex2DGrad(texture<short1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex2DGrad(texture<short2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex2DGrad(texture<short4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DGrad(texture<unsigned short, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DGrad(texture<ushort1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DGrad(texture<ushort2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DGrad(texture<ushort4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex2DGrad(texture<int, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex2DGrad(texture<int1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex2DGrad(texture<int2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex2DGrad(texture<int4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DGrad(texture<unsigned int, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DGrad(texture<uint1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DGrad(texture<uint2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DGrad(texture<uint4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex2DGrad(texture<float, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex2DGrad(texture<float1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex2DGrad(texture<float2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex2DGrad(texture<float4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex3D(texture<char, texType, mode> texRef, float x, float y,
- float z) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex3D(texture<char1, texType, mode> texRef, float x, float y,
- float z) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex3D(texture<char2, texType, mode> texRef, float x, float y,
- float z) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex3D(texture<char4, texType, mode> texRef, float x, float y,
- float z) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex3D(texture<unsigned char, texType, mode> texRef,
- float x, float y, float z) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex3D(texture<uchar1, texType, mode> texRef, float x, float y,
- float z) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex3D(texture<uchar2, texType, mode> texRef, float x, float y,
- float z) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex3D(texture<uchar4, texType, mode> texRef, float x, float y,
- float z) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex3D(texture<short, texType, mode> texRef, float x, float y,
- float z) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex3D(texture<short1, texType, mode> texRef, float x, float y,
- float z) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex3D(texture<short2, texType, mode> texRef, float x, float y,
- float z) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex3D(texture<short4, texType, mode> texRef, float x, float y,
- float z) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex3D(texture<unsigned short, texType, mode> texRef,
- float x, float y, float z) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex3D(texture<ushort1, texType, mode> texRef, float x, float y,
- float z) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex3D(texture<ushort2, texType, mode> texRef, float x, float y,
- float z) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex3D(texture<ushort4, texType, mode> texRef, float x, float y,
- float z) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex3D(texture<int, texType, mode> texRef, float x, float y,
- float z) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex3D(texture<int1, texType, mode> texRef, float x, float y,
- float z) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex3D(texture<int2, texType, mode> texRef, float x, float y,
- float z) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex3D(texture<int4, texType, mode> texRef, float x, float y,
- float z) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex3D(texture<unsigned int, texType, mode> texRef, float x,
- float y, float z) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex3D(texture<uint1, texType, mode> texRef, float x, float y,
- float z) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex3D(texture<uint2, texType, mode> texRef, float x, float y,
- float z) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex3D(texture<uint4, texType, mode> texRef, float x, float y,
- float z) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex3D(texture<float, texType, mode> texRef, float x, float y,
- float z) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex3D(texture<float1, texType, mode> texRef, float x, float y,
- float z) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex3D(texture<float2, texType, mode> texRef, float x, float y,
- float z) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex3D(texture<float4, texType, mode> texRef, float x, float y,
- float z) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex3D(texture<char, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex3D(texture<char1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex3D(texture<char2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex3D(texture<char4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex3D(texture<unsigned char, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex3D(texture<uchar1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex3D(texture<uchar2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex3D(texture<uchar4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex3D(texture<short, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex3D(texture<short1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex3D(texture<short2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex3D(texture<short4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex3D(texture<unsigned short, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex3D(texture<ushort1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex3D(texture<ushort2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex3D(texture<ushort4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex3D(texture<int, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex3D(texture<int1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex3D(texture<int2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex3D(texture<int4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y, float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex3D(texture<unsigned int, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex3D(texture<uint1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex3D(texture<uint2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex3D(texture<uint4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex3D(texture<float, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex3D(texture<float1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex3D(texture<float2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex3D(texture<float4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex3DLod(texture<char, texType, mode> texRef, float x, float y,
- float z, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex3DLod(texture<char1, texType, mode> texRef, float x, float y,
- float z, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex3DLod(texture<char2, texType, mode> texRef, float x, float y,
- float z, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex3DLod(texture<char4, texType, mode> texRef, float x, float y,
- float z, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex3DLod(texture<unsigned char, texType, mode> texRef,
- float x, float y, float z, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex3DLod(texture<uchar1, texType, mode> texRef, float x, float y,
- float z, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex3DLod(texture<uchar2, texType, mode> texRef, float x, float y,
- float z, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex3DLod(texture<uchar4, texType, mode> texRef, float x, float y,
- float z, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex3DLod(texture<int, texType, mode> texRef, float x, float y,
- float z, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex3DLod(texture<int1, texType, mode> texRef, float x, float y,
- float z, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex3DLod(texture<int2, texType, mode> texRef, float x, float y,
- float z, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex3DLod(texture<int4, texType, mode> texRef, float x, float y,
- float z, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex3DLod(texture<unsigned int, texType, mode> texRef,
- float x, float y, float z, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex3DLod(texture<uint1, texType, mode> texRef, float x, float y,
- float z, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex3DLod(texture<uint2, texType, mode> texRef, float x, float y,
- float z, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex3DLod(texture<uint4, texType, mode> texRef, float x, float y,
- float z, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex3DLod(texture<float, texType, mode> texRef, float x, float y,
- float z, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex3DLod(texture<float1, texType, mode> texRef, float x, float y,
- float z, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex3DLod(texture<float2, texType, mode> texRef, float x, float y,
- float z, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex3DLod(texture<float4, texType, mode> texRef, float x, float y,
- float z, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex3DLod(texture<char, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex3DLod(texture<char1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex3DLod(texture<char2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex3DLod(texture<char4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex3DLod(texture<unsigned char, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex3DLod(texture<uchar1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex3DLod(texture<uchar2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex3DLod(texture<uchar4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex3DLod(texture<int, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y, float z,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex3DLod(texture<int1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex3DLod(texture<int2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex3DLod(texture<int4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex3DLod(texture<unsigned int, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex3DLod(texture<uint1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex3DLod(texture<uint2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex3DLod(texture<uint4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex3DLod(texture<float, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex3DLod(texture<float1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex3DLod(texture<float2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex3DLod(texture<float4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
- level);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex3DGrad(texture<char, texType, mode> texRef, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex3DGrad(texture<char1, texType, mode> texRef, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex3DGrad(texture<char2, texType, mode> texRef, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex3DGrad(texture<char4, texType, mode> texRef, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex3DGrad(texture<unsigned char, texType, mode> texRef,
- float x, float y, float z, float4 dx,
- float4 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex3DGrad(texture<uchar1, texType, mode> texRef, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex3DGrad(texture<uchar2, texType, mode> texRef, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex3DGrad(texture<uchar4, texType, mode> texRef, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex3DGrad(texture<short, texType, mode> texRef, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex3DGrad(texture<short1, texType, mode> texRef, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex3DGrad(texture<short2, texType, mode> texRef, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex3DGrad(texture<short4, texType, mode> texRef, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex3DGrad(texture<unsigned short, texType, mode> texRef,
- float x, float y, float z, float4 dx,
- float4 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex3DGrad(texture<ushort1, texType, mode> texRef, float x,
- float y, float z, float4 dx, float4 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex3DGrad(texture<ushort2, texType, mode> texRef, float x,
- float y, float z, float4 dx, float4 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex3DGrad(texture<ushort4, texType, mode> texRef, float x,
- float y, float z, float4 dx, float4 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex3DGrad(texture<int, texType, mode> texRef, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex3DGrad(texture<int1, texType, mode> texRef, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex3DGrad(texture<int2, texType, mode> texRef, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex3DGrad(texture<int4, texType, mode> texRef, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex3DGrad(texture<unsigned int, texType, mode> texRef,
- float x, float y, float z, float4 dx, float4 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex3DGrad(texture<uint1, texType, mode> texRef, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex3DGrad(texture<uint2, texType, mode> texRef, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex3DGrad(texture<uint4, texType, mode> texRef, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex3DGrad(texture<float, texType, mode> texRef, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex3DGrad(texture<float1, texType, mode> texRef, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex3DGrad(texture<float2, texType, mode> texRef, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex3DGrad(texture<float4, texType, mode> texRef, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex3DGrad(texture<char, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex3DGrad(texture<char1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex3DGrad(texture<char2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex3DGrad(texture<char4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex3DGrad(texture<unsigned char, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, float z, float4 dx, float4 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex3DGrad(texture<uchar1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex3DGrad(texture<uchar2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex3DGrad(texture<uchar4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex3DGrad(texture<short, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex3DGrad(texture<short1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex3DGrad(texture<short2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex3DGrad(texture<short4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex3DGrad(texture<unsigned short, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, float z, float4 dx, float4 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex3DGrad(texture<ushort1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex3DGrad(texture<ushort2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex3DGrad(texture<ushort4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex3DGrad(texture<int, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex3DGrad(texture<int1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex3DGrad(texture<int2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex3DGrad(texture<int4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex3DGrad(texture<unsigned int, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, float z, float4 dx, float4 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex3DGrad(texture<uint1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex3DGrad(texture<uint2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex3DGrad(texture<uint4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex3DGrad(texture<float, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex3DGrad(texture<float1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex3DGrad(texture<float2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex3DGrad(texture<float4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- float z, float4 dx, float4 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
- float4(dx.x, dx.y, dx.z, dx.w).data,
- float4(dy.x, dy.y, dy.z, dy.w).data);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex1DLayered(texture<char, texType, mode> texRef, float x,
- int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLayered(texture<char1, texType, mode> texRef, float x,
- int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLayered(texture<char2, texType, mode> texRef, float x,
- int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLayered(texture<char4, texType, mode> texRef, float x,
- int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLayered(texture<unsigned char, texType, mode> texRef,
- float x, int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLayered(texture<uchar1, texType, mode> texRef, float x,
- int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLayered(texture<uchar2, texType, mode> texRef, float x,
- int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLayered(texture<uchar4, texType, mode> texRef, float x,
- int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex1DLayered(texture<short, texType, mode> texRef, float x,
- int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLayered(texture<short1, texType, mode> texRef, float x,
- int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLayered(texture<short2, texType, mode> texRef, float x,
- int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLayered(texture<short4, texType, mode> texRef, float x,
- int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLayered(
- texture<unsigned short, texType, mode> texRef, float x, int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLayered(texture<ushort1, texType, mode> texRef, float x,
- int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLayered(texture<ushort2, texType, mode> texRef, float x,
- int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLayered(texture<ushort4, texType, mode> texRef, float x,
- int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex1DLayered(texture<int, texType, mode> texRef, float x,
- int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLayered(texture<int1, texType, mode> texRef, float x,
- int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLayered(texture<int2, texType, mode> texRef, float x,
- int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLayered(texture<int4, texType, mode> texRef, float x,
- int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLayered(texture<unsigned int, texType, mode> texRef,
- float x, int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLayered(texture<uint1, texType, mode> texRef, float x,
- int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLayered(texture<uint2, texType, mode> texRef, float x,
- int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLayered(texture<uint4, texType, mode> texRef, float x,
- int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex1DLayered(texture<float, texType, mode> texRef, float x,
- int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLayered(texture<float1, texType, mode> texRef, float x,
- int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLayered(texture<float2, texType, mode> texRef, float x,
- int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLayered(texture<float4, texType, mode> texRef, float x,
- int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex1DLayered(texture<char, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLayered(texture<char1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLayered(texture<char2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLayered(texture<char4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLayered(texture<unsigned char, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLayered(texture<uchar1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLayered(texture<uchar2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLayered(texture<uchar4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex1DLayered(texture<short, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLayered(texture<short1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLayered(texture<short2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLayered(texture<short4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLayered(
- texture<unsigned short, texType, mode> texRef, hipTextureObject_t textureObject, float x,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLayered(texture<ushort1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLayered(texture<ushort2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLayered(texture<ushort4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex1DLayered(texture<int, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLayered(texture<int1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLayered(texture<int2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLayered(texture<int4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLayered(texture<unsigned int, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLayered(texture<uint1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLayered(texture<uint2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLayered(texture<uint4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex1DLayered(texture<float, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLayered(texture<float1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLayered(texture<float2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLayered(texture<float4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex1DLayeredLod(texture<char, texType, mode> texRef, float x,
- int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLayeredLod(texture<char1, texType, mode> texRef, float x,
- int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLayeredLod(texture<char2, texType, mode> texRef, float x,
- int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLayeredLod(texture<char4, texType, mode> texRef, float x,
- int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLayeredLod(
- texture<unsigned char, texType, mode> texRef, float x, int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLayeredLod(texture<uchar1, texType, mode> texRef, float x,
- int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLayeredLod(texture<uchar2, texType, mode> texRef, float x,
- int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLayeredLod(texture<uchar4, texType, mode> texRef, float x,
- int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex1DLayeredLod(texture<short, texType, mode> texRef, float x,
- int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLayeredLod(texture<short1, texType, mode> texRef, float x,
- int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLayeredLod(texture<short2, texType, mode> texRef, float x,
- int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLayeredLod(texture<short4, texType, mode> texRef, float x,
- int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLayeredLod(
- texture<unsigned short, texType, mode> texRef, float x, int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLayeredLod(texture<ushort1, texType, mode> texRef, float x,
- int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLayeredLod(texture<ushort2, texType, mode> texRef, float x,
- int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLayeredLod(texture<ushort4, texType, mode> texRef, float x,
- int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex1DLayeredLod(texture<int, texType, mode> texRef, float x,
- int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLayeredLod(texture<int1, texType, mode> texRef, float x,
- int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLayeredLod(texture<int2, texType, mode> texRef, float x,
- int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLayeredLod(texture<int4, texType, mode> texRef, float x,
- int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLayeredLod(texture<unsigned int, texType, mode> texRef,
- float x, int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLayeredLod(texture<uint1, texType, mode> texRef, float x,
- int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLayeredLod(texture<uint2, texType, mode> texRef, float x,
- int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLayeredLod(texture<uint4, texType, mode> texRef, float x,
- int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex1DLayeredLod(texture<float, texType, mode> texRef, float x,
- int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLayeredLod(texture<float1, texType, mode> texRef, float x,
- int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLayeredLod(texture<float2, texType, mode> texRef, float x,
- int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLayeredLod(texture<float4, texType, mode> texRef, float x,
- int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex1DLayeredLod(texture<char, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLayeredLod(texture<char1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLayeredLod(texture<char2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLayeredLod(texture<char4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLayeredLod(
- texture<unsigned char, texType, mode> texRef, hipTextureObject_t textureObject, float x,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLayeredLod(texture<uchar1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLayeredLod(texture<uchar2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLayeredLod(texture<uchar4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex1DLayeredLod(texture<short, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLayeredLod(texture<short1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLayeredLod(texture<short2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLayeredLod(texture<short4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLayeredLod(
- texture<unsigned short, texType, mode> texRef, hipTextureObject_t textureObject, float x,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLayeredLod(texture<ushort1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLayeredLod(texture<ushort2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLayeredLod(texture<ushort4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex1DLayeredLod(texture<int, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, int layer,
- float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLayeredLod(texture<int1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLayeredLod(texture<int2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLayeredLod(texture<int4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLayeredLod(texture<unsigned int, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLayeredLod(texture<uint1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLayeredLod(texture<uint2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLayeredLod(texture<uint4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex1DLayeredLod(texture<float, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLayeredLod(texture<float1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLayeredLod(texture<float2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLayeredLod(texture<float4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex1DLayeredGrad(texture<char, texType, mode> texRef, float x,
- int layer, float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex1DLayeredGrad(texture<char, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLayeredGrad(texture<char1, texType, mode> texRef, float x,
- int layer, float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLayeredGrad(texture<char1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLayeredGrad(texture<char2, texType, mode> texRef, float x,
- int layer, float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLayeredGrad(texture<char2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLayeredGrad(texture<char4, texType, mode> texRef, float x,
- int layer, float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLayeredGrad(texture<char4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLayeredGrad(
- texture<unsigned char, texType, mode> texRef, float x, int layer, float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLayeredGrad(
- texture<unsigned char, texType, mode> texRef, hipTextureObject_t textureObject, float x,
- int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLayeredGrad(texture<uchar1, texType, mode> texRef, float x,
- int layer, float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLayeredGrad(texture<uchar1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLayeredGrad(texture<uchar2, texType, mode> texRef, float x,
- int layer, float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLayeredGrad(texture<uchar2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLayeredGrad(texture<uchar4, texType, mode> texRef, float x,
- int layer, float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLayeredGrad(texture<uchar4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex1DLayeredGrad(texture<short, texType, mode> texRef, float x,
- int layer, float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex1DLayeredGrad(texture<short, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLayeredGrad(texture<short1, texType, mode> texRef, float x,
- int layer, float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLayeredGrad(texture<short1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLayeredGrad(texture<short2, texType, mode> texRef, float x,
- int layer, float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLayeredGrad(texture<short2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLayeredGrad(texture<short4, texType, mode> texRef, float x,
- int layer, float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLayeredGrad(texture<short4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLayeredGrad(
- texture<unsigned short, texType, mode> texRef, float x, int layer, float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLayeredGrad(
- texture<unsigned short, texType, mode> texRef, hipTextureObject_t textureObject, float x,
- int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLayeredGrad(texture<ushort1, texType, mode> texRef, float x,
- int layer, float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLayeredGrad(texture<ushort1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLayeredGrad(texture<ushort2, texType, mode> texRef, float x,
- int layer, float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLayeredGrad(texture<ushort2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLayeredGrad(texture<ushort4, texType, mode> texRef, float x,
- int layer, float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLayeredGrad(texture<ushort4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex1DLayeredGrad(texture<int, texType, mode> texRef, float x,
- int layer, float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex1DLayeredGrad(texture<int, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLayeredGrad(texture<int1, texType, mode> texRef, float x,
- int layer, float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLayeredGrad(texture<int1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLayeredGrad(texture<int2, texType, mode> texRef, float x,
- int layer, float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLayeredGrad(texture<int2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLayeredGrad(texture<int4, texType, mode> texRef, float x,
- int layer, float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLayeredGrad(texture<int4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLayeredGrad(
- texture<unsigned int, texType, mode> texRef, float x, int layer, float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLayeredGrad(
- texture<unsigned int, texType, mode> texRef, hipTextureObject_t textureObject, float x,
- int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLayeredGrad(texture<uint1, texType, mode> texRef, float x,
- int layer, float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLayeredGrad(texture<uint1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLayeredGrad(texture<uint2, texType, mode> texRef, float x,
- int layer, float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLayeredGrad(texture<uint2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLayeredGrad(texture<uint4, texType, mode> texRef, float x,
- int layer, float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLayeredGrad(texture<uint4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex1DLayeredGrad(texture<float, texType, mode> texRef, float x,
- int layer, float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex1DLayeredGrad(texture<float, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLayeredGrad(texture<float1, texType, mode> texRef, float x,
- int layer, float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLayeredGrad(texture<float1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLayeredGrad(texture<float2, texType, mode> texRef, float x,
- int layer, float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLayeredGrad(texture<float2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLayeredGrad(texture<float4, texType, mode> texRef, float x,
- int layer, float dx, float dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLayeredGrad(texture<float4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- int layer, float dx, float dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex2DLayered(texture<char, texType, mode> texRef, float x, float y,
- int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex2DLayered(texture<char, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLayered(texture<char1, texType, mode> texRef, float x,
- float y, int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLayered(texture<char1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLayered(texture<char2, texType, mode> texRef, float x,
- float y, int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLayered(texture<char2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLayered(texture<char4, texType, mode> texRef, float x,
- float y, int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLayered(texture<char4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLayered(texture<unsigned char, texType, mode> texRef,
- float x, float y, int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLayered(texture<unsigned char, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLayered(texture<uchar1, texType, mode> texRef, float x,
- float y, int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLayered(texture<uchar1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLayered(texture<uchar2, texType, mode> texRef, float x,
- float y, int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLayered(texture<uchar2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLayered(texture<uchar4, texType, mode> texRef, float x,
- float y, int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLayered(texture<uchar4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex2DLayered(texture<short, texType, mode> texRef, float x,
- float y, int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex2DLayered(texture<short, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLayered(texture<short1, texType, mode> texRef, float x,
- float y, int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLayered(texture<short1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLayered(texture<short2, texType, mode> texRef, float x,
- float y, int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLayered(texture<short2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLayered(texture<short4, texType, mode> texRef, float x,
- float y, int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLayered(texture<short4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLayered(
- texture<unsigned short, texType, mode> texRef, float x, float y, int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLayered(
- texture<unsigned short, texType, mode> texRef, hipTextureObject_t textureObject, float x,
- float y, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLayered(texture<ushort1, texType, mode> texRef, float x,
- float y, int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLayered(texture<ushort1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLayered(texture<ushort2, texType, mode> texRef, float x,
- float y, int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLayered(texture<ushort2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLayered(texture<ushort4, texType, mode> texRef, float x,
- float y, int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLayered(texture<ushort4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex2DLayered(texture<int, texType, mode> texRef, float x, float y,
- int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex2DLayered(texture<int, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLayered(texture<int1, texType, mode> texRef, float x, float y,
- int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLayered(texture<int1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLayered(texture<int2, texType, mode> texRef, float x, float y,
- int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLayered(texture<int2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLayered(texture<int4, texType, mode> texRef, float x, float y,
- int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLayered(texture<int4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLayered(texture<unsigned int, texType, mode> texRef,
- float x, float y, int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLayered(texture<unsigned int, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLayered(texture<uint1, texType, mode> texRef, float x,
- float y, int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLayered(texture<uint1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLayered(texture<uint2, texType, mode> texRef, float x,
- float y, int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLayered(texture<uint2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLayered(texture<uint4, texType, mode> texRef, float x,
- float y, int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLayered(texture<uint4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex2DLayered(texture<float, texType, mode> texRef, float x,
- float y, int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex2DLayered(texture<float, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLayered(texture<float1, texType, mode> texRef, float x,
- float y, int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLayered(texture<float1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLayered(texture<float2, texType, mode> texRef, float x,
- float y, int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLayered(texture<float2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLayered(texture<float4, texType, mode> texRef, float x,
- float y, int layer) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLayered(texture<float4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex2DLayeredLod(texture<char, texType, mode> texRef, float x,
- float y, int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex2DLayeredLod(texture<char, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLayeredLod(texture<char1, texType, mode> texRef, float x,
- float y, int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLayeredLod(texture<char1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLayeredLod(texture<char2, texType, mode> texRef, float x,
- float y, int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLayeredLod(texture<char2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLayeredLod(texture<char4, texType, mode> texRef, float x,
- float y, int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLayeredLod(texture<char4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLayeredLod(
- texture<unsigned char, texType, mode> texRef, float x, float y, int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLayeredLod(
- texture<unsigned char, texType, mode> texRef, hipTextureObject_t textureObject, float x,
- float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLayeredLod(texture<uchar1, texType, mode> texRef, float x,
- float y, int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLayeredLod(texture<uchar1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLayeredLod(texture<uchar2, texType, mode> texRef, float x,
- float y, int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLayeredLod(texture<uchar2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLayeredLod(texture<uchar4, texType, mode> texRef, float x,
- float y, int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLayeredLod(texture<uchar4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex2DLayeredLod(texture<short, texType, mode> texRef, float x,
- float y, int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex2DLayeredLod(texture<short, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLayeredLod(texture<short1, texType, mode> texRef, float x,
- float y, int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLayeredLod(texture<short1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLayeredLod(texture<short2, texType, mode> texRef, float x,
- float y, int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLayeredLod(texture<short2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLayeredLod(texture<short4, texType, mode> texRef, float x,
- float y, int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLayeredLod(texture<short4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLayeredLod(
- texture<unsigned short, texType, mode> texRef, float x, float y, int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLayeredLod(
- texture<unsigned short, texType, mode> texRef, hipTextureObject_t textureObject, float x,
- float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLayeredLod(texture<ushort1, texType, mode> texRef, float x,
- float y, int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLayeredLod(texture<ushort1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLayeredLod(texture<ushort2, texType, mode> texRef, float x,
- float y, int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLayeredLod(texture<ushort2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLayeredLod(texture<ushort4, texType, mode> texRef, float x,
- float y, int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLayeredLod(texture<ushort4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex2DLayeredLod(texture<int, texType, mode> texRef, float x, float y,
- int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex2DLayeredLod(texture<int, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLayeredLod(texture<int1, texType, mode> texRef, float x,
- float y, int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLayeredLod(texture<int1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLayeredLod(texture<int2, texType, mode> texRef, float x,
- float y, int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLayeredLod(texture<int2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLayeredLod(texture<int4, texType, mode> texRef, float x,
- float y, int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLayeredLod(texture<int4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLayeredLod(texture<unsigned int, texType, mode> texRef,
- float x, float y, int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLayeredLod(texture<unsigned int, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLayeredLod(texture<uint1, texType, mode> texRef, float x,
- float y, int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLayeredLod(texture<uint1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLayeredLod(texture<uint2, texType, mode> texRef, float x,
- float y, int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLayeredLod(texture<uint2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLayeredLod(texture<uint4, texType, mode> texRef, float x,
- float y, int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLayeredLod(texture<uint4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex2DLayeredLod(texture<float, texType, mode> texRef, float x,
- float y, int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex2DLayeredLod(texture<float, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLayeredLod(texture<float1, texType, mode> texRef, float x,
- float y, int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLayeredLod(texture<float1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLayeredLod(texture<float2, texType, mode> texRef, float x,
- float y, int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLayeredLod(texture<float2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLayeredLod(texture<float4, texType, mode> texRef, float x,
- float y, int layer, float level) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLayeredLod(texture<float4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float level) {
- TEXTURE_PARAMETERS_INIT;
- texel.f = __ockl_image_sample_lod_2Da(
- i, s, float4(x, y, layer, 0.0f).data, level);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex2DLayeredGrad(texture<char, texType, mode> texRef, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex2DLayeredGrad(texture<char, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer, float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLayeredGrad(texture<char1, texType, mode> texRef, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLayeredGrad(texture<char1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLayeredGrad(texture<char2, texType, mode> texRef, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLayeredGrad(texture<char2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLayeredGrad(texture<char4, texType, mode> texRef, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLayeredGrad(texture<char4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLayeredGrad(
- texture<unsigned char, texType, mode> texRef, float x, float y, int layer, float2 dx,
- float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLayeredGrad(
- texture<unsigned char, texType, mode> texRef, hipTextureObject_t textureObject, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLayeredGrad(texture<uchar1, texType, mode> texRef, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLayeredGrad(texture<uchar1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLayeredGrad(texture<uchar2, texType, mode> texRef, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLayeredGrad(texture<uchar2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLayeredGrad(texture<uchar4, texType, mode> texRef, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLayeredGrad(texture<uchar4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex2DLayeredGrad(texture<short, texType, mode> texRef, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex2DLayeredGrad(texture<short, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLayeredGrad(texture<short1, texType, mode> texRef, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLayeredGrad(texture<short1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLayeredGrad(texture<short2, texType, mode> texRef, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLayeredGrad(texture<short2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLayeredGrad(texture<short4, texType, mode> texRef, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLayeredGrad(texture<short4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLayeredGrad(
- texture<unsigned short, texType, mode> texRef, float x, float y, int layer, float2 dx,
- float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLayeredGrad(
- texture<unsigned short, texType, mode> texRef, hipTextureObject_t textureObject, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLayeredGrad(texture<ushort1, texType, mode> texRef, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLayeredGrad(texture<ushort1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLayeredGrad(texture<ushort2, texType, mode> texRef, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLayeredGrad(texture<ushort2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLayeredGrad(texture<ushort4, texType, mode> texRef, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLayeredGrad(texture<ushort4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex2DLayeredGrad(texture<int, texType, mode> texRef, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex2DLayeredGrad(texture<int, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer, float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLayeredGrad(texture<int1, texType, mode> texRef, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLayeredGrad(texture<int1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer, float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLayeredGrad(texture<int2, texType, mode> texRef, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLayeredGrad(texture<int2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer, float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLayeredGrad(texture<int4, texType, mode> texRef, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLayeredGrad(texture<int4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x, float y,
- int layer, float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLayeredGrad(
- texture<unsigned int, texType, mode> texRef, float x, float y, int layer, float2 dx,
- float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLayeredGrad(
- texture<unsigned int, texType, mode> texRef, hipTextureObject_t textureObject, float x, float y,
- int layer, float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLayeredGrad(texture<uint1, texType, mode> texRef, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLayeredGrad(texture<uint1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLayeredGrad(texture<uint2, texType, mode> texRef, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLayeredGrad(texture<uint2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLayeredGrad(texture<uint4, texType, mode> texRef, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLayeredGrad(texture<uint4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex2DLayeredGrad(texture<float, texType, mode> texRef, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex2DLayeredGrad(texture<float, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLayeredGrad(texture<float1, texType, mode> texRef, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLayeredGrad(texture<float1, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLayeredGrad(texture<float2, texType, mode> texRef, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLayeredGrad(texture<float2, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLayeredGrad(texture<float4, texType, mode> texRef, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_REF_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLayeredGrad(texture<float4, texType, mode> texRef,
- hipTextureObject_t textureObject, float x,
- float y, int layer, float2 dx, float2 dy) {
- TEXTURE_PARAMETERS_INIT;
- texel.f =
- __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
- float2(dx.x, dx.y).data,
- float2(dy.x, dy.y).data);
- TEXTURE_RETURN_FLOAT_XYZW;
-}
-#endif
diff --git a/third_party/rocm/include/hip/hcc_detail/texture_indirect_functions.h b/third_party/rocm/include/hip/hcc_detail/texture_indirect_functions.h
deleted file mode 100644
index 2fe33f3..0000000
--- a/third_party/rocm/include/hip/hcc_detail/texture_indirect_functions.h
+++ /dev/null
@@ -1,501 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#pragma once
-
-#if defined(__cplusplus)
-
-#include <hip/hip_vector_types.h>
-#include <hip/hip_texture_types.h>
-#include <hip/hcc_detail/ockl_image.h>
-
-#include <type_traits>
-
-#define TEXTURE_OBJECT_PARAMETERS_INIT \
- unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)textureObject; \
- unsigned int ADDRESS_SPACE_CONSTANT* s = i + HIP_SAMPLER_OBJECT_OFFSET_DWORD;
-
-template<typename T>
-struct __hip_is_itex_channel_type
-{
- static constexpr bool value =
- std::is_same<T, char>::value ||
- std::is_same<T, unsigned char>::value ||
- std::is_same<T, short>::value ||
- std::is_same<T, unsigned short>::value ||
- std::is_same<T, int>::value ||
- std::is_same<T, unsigned int>::value ||
- std::is_same<T, float>::value;
-};
-
-template<
- typename T,
- unsigned int rank>
-struct __hip_is_itex_channel_type<HIP_vector_type<T, rank>>
-{
- static constexpr bool value =
- __hip_is_itex_channel_type<T>::value &&
- ((rank == 1) ||
- (rank == 2) ||
- (rank == 4));
-};
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex1Dfetch(hipTextureObject_t textureObject, int x)
-{
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_load_1Db(i, x);
- return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex1Dfetch(T *ptr, hipTextureObject_t textureObject, int x)
-{
- *ptr = tex1Dfetch<T>(textureObject, x);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex1D(hipTextureObject_t textureObject, float x)
-{
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_1D(i, s, x);
- return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex1D(T *ptr, hipTextureObject_t textureObject, float x)
-{
- *ptr = tex1D<T>(textureObject, x);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex2D(hipTextureObject_t textureObject, float x, float y)
-{
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_2D(i, s, float2(x, y).data);
- return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex2D(T *ptr, hipTextureObject_t textureObject, float x, float y)
-{
- *ptr = tex2D<T>(textureObject, x, y);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex3D(hipTextureObject_t textureObject, float x, float y, float z)
-{
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex3D(T *ptr, hipTextureObject_t textureObject, float x, float y, float z)
-{
- *ptr = tex3D<T>(textureObject, x, y, z);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex1DLayered(hipTextureObject_t textureObject, float x, int layer)
-{
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex1DLayered(T *ptr, hipTextureObject_t textureObject, float x, int layer)
-{
- *ptr = tex1DLayered<T>(textureObject, x, layer);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex2DLayered(hipTextureObject_t textureObject, float x, float y, int layer)
-{
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex2DLayered(T *ptr, hipTextureObject_t textureObject, float x, float y, int layer)
-{
- *ptr = tex1DLayered<T>(textureObject, x, y, layer);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T texCubemap(hipTextureObject_t textureObject, float x, float y, float z)
-{
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_CM(i, s, float4(x, y, z, 0.0f).data);
- return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void texCubemap(T *ptr, hipTextureObject_t textureObject, float x, float y, float z)
-{
- *ptr = texCubemap<T>(textureObject, x, y, z);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T texCubemapLayered(hipTextureObject_t textureObject, float x, float y, float z, int layer)
-{
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_CMa(i, s, float4(x, y, z, layer).data);
- return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void texCubemapLayered(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, int layer)
-{
- *ptr = texCubemapLayered<T>(textureObject, x, y, z, layer);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex2Dgather(hipTextureObject_t textureObject, float x, float y, int comp = 0)
-{
- TEXTURE_OBJECT_PARAMETERS_INIT
- switch (comp) {
- case 1: {
- auto tmp = __ockl_image_gather4r_2D(i, s, float2(x, y).data);
- return *reinterpret_cast<T*>(&tmp);
- break;
- }
- case 2: {
- auto tmp = __ockl_image_gather4g_2D(i, s, float2(x, y).data);
- return *reinterpret_cast<T*>(&tmp);
- break;
- }
- case 3: {
- auto tmp = __ockl_image_gather4b_2D(i, s, float2(x, y).data);
- return *reinterpret_cast<T*>(&tmp);
- break;
- }
- default: {
- auto tmp = __ockl_image_gather4a_2D(i, s, float2(x, y).data);
- return *reinterpret_cast<T*>(&tmp);
- break;
- }
- };
- return {};
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex2Dgather(T *ptr, hipTextureObject_t textureObject, float x, float y, int comp = 0)
-{
- *ptr = texCubemapLayered<T>(textureObject, x, y, comp);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex1DLod(hipTextureObject_t textureObject, float x, float level)
-{
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_lod_1D(i, s, x, level);
- return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex1DLod(T *ptr, hipTextureObject_t textureObject, float x, float level)
-{
- *ptr = tex1DLod<T>(textureObject, x, level);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex2DLod(hipTextureObject_t textureObject, float x, float y, float level)
-{
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex2DLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float level)
-{
- *ptr = tex2DLod<T>(textureObject, x, y, level);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex3DLod(hipTextureObject_t textureObject, float x, float y, float z, float level)
-{
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, level);
- return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex3DLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float level)
-{
- *ptr = tex3DLod<T>(textureObject, x, y, z, level);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex1DLayeredLod(hipTextureObject_t textureObject, float x, int layer, float level)
-{
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex1DLayeredLod(T *ptr, hipTextureObject_t textureObject, float x, int layer, float level)
-{
- *ptr = tex1DLayeredLod<T>(textureObject, x, layer, level);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex2DLayeredLod(hipTextureObject_t textureObject, float x, float y, int layer, float level)
-{
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex2DLayeredLod(T *ptr, hipTextureObject_t textureObject, float x, float y, int layer, float level)
-{
- *ptr = tex2DLayeredLod<T>(textureObject, x, y, layer, level);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T texCubemapLod(hipTextureObject_t textureObject, float x, float y, float z, float level)
-{
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_lod_CM(i, s, float4(x, y, z, 0.0f).data, level);
- return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void texCubemapLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float level)
-{
- *ptr = texCubemapLod<T>(textureObject, x, y, z, level);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T texCubemapGrad(hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy)
-{
- TEXTURE_OBJECT_PARAMETERS_INIT
- // TODO missing in device libs.
- // auto tmp = __ockl_image_sample_grad_CM(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
- // return *reinterpret_cast<T*>(&tmp);
- return {};
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void texCubemapGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy)
-{
- *ptr = texCubemapGrad<T>(textureObject, x, y, z, dPdx, dPdy);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T texCubemapLayeredLod(hipTextureObject_t textureObject, float x, float y, float z, int layer, float level)
-{
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_lod_CMa(i, s, float4(x, y, z, layer).data, level);
- return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void texCubemapLayeredLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, int layer, float level)
-{
- *ptr = texCubemapLayeredLod<T>(textureObject, x, y, z, layer, level);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex1DGrad(hipTextureObject_t textureObject, float x, float dPdx, float dPdy)
-{
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_grad_1D(i, s, x, dPdx, dPdy);
- return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex1DGrad(T *ptr, hipTextureObject_t textureObject, float x, float dPdx, float dPdy)
-{
- *ptr = tex1DGrad<T>(textureObject, x, dPdx, dPdy);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex2DGrad(hipTextureObject_t textureObject, float x, float y, float2 dPdx, float2 dPdy)
-{
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data);
- return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex2DGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float2 dPdx, float2 dPdy)
-{
- *ptr = tex2DGrad<T>(textureObject, x, y, dPdx, dPdy);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex3DGrad(hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy)
-{
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
- return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex3DGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy)
-{
- *ptr = tex3DGrad<T>(textureObject, x, y, z, dPdx, dPdy);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex1DLayeredGrad(hipTextureObject_t textureObject, float x, int layer, float dPdx, float dPdy)
-{
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dPdx, dPdy);
- return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex1DLayeredGrad(T *ptr, hipTextureObject_t textureObject, float x, int layer, float dPdx, float dPdy)
-{
- *ptr = tex1DLayeredGrad<T>(textureObject, x, layer, dPdx, dPdy);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex2DLayeredGrad(hipTextureObject_t textureObject, float x, float y, int layer, float2 dPdx, float2 dPdy)
-{
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data);
- return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex2DLayeredGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, int layer, float2 dPdx, float2 dPdy)
-{
- *ptr = tex2DLayeredGrad<T>(textureObject, x, y, layer, dPdx, dPdy);
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T texCubemapLayeredGrad(hipTextureObject_t textureObject, float x, float y, float z, int layer, float4 dPdx, float4 dPdy)
-{
- TEXTURE_OBJECT_PARAMETERS_INIT
- // TODO missing in device libs.
- // auto tmp = __ockl_image_sample_grad_CMa(i, s, float4(x, y, z, layer).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
- // return *reinterpret_cast<T*>(&tmp);
- return {};
-}
-
-template <
- typename T,
- typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void texCubemapLayeredGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, int layer, float4 dPdx, float4 dPdy)
-{
- *ptr = texCubemapLayeredGrad<T>(textureObject, x, y, z, layer, dPdx, dPdy);
-}
-
-#endif
diff --git a/third_party/rocm/include/hip/hcc_detail/texture_types.h b/third_party/rocm/include/hip/hcc_detail/texture_types.h
deleted file mode 100644
index 832b909..0000000
--- a/third_party/rocm/include/hip/hcc_detail/texture_types.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_TEXTURE_TYPES_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_TEXTURE_TYPES_H
-
-#include <hip/hcc_detail/driver_types.h>
-
-#define hipTextureType1D 0x01
-#define hipTextureType2D 0x02
-#define hipTextureType3D 0x03
-#define hipTextureTypeCubemap 0x0C
-#define hipTextureType1DLayered 0xF1
-#define hipTextureType2DLayered 0xF2
-#define hipTextureTypeCubemapLayered 0xFC
-
-/**
- * Should be same as HSA_IMAGE_OBJECT_SIZE_DWORD/HSA_SAMPLER_OBJECT_SIZE_DWORD
- */
-#define HIP_IMAGE_OBJECT_SIZE_DWORD 12
-#define HIP_SAMPLER_OBJECT_SIZE_DWORD 8
-#define HIP_SAMPLER_OBJECT_OFFSET_DWORD HIP_IMAGE_OBJECT_SIZE_DWORD
-#define HIP_TEXTURE_OBJECT_SIZE_DWORD (HIP_IMAGE_OBJECT_SIZE_DWORD + HIP_SAMPLER_OBJECT_SIZE_DWORD)
-
-/**
- * An opaque value that represents a hip texture object
- */
-struct __hip_texture;
-typedef struct __hip_texture* hipTextureObject_t;
-
-/**
- * hip texture address modes
- */
-enum hipTextureAddressMode {
- hipAddressModeWrap = 0,
- hipAddressModeClamp = 1,
- hipAddressModeMirror = 2,
- hipAddressModeBorder = 3
-};
-
-/**
- * hip texture filter modes
- */
-enum hipTextureFilterMode { hipFilterModePoint = 0, hipFilterModeLinear = 1 };
-
-/**
- * hip texture read modes
- */
-enum hipTextureReadMode { hipReadModeElementType = 0, hipReadModeNormalizedFloat = 1 };
-
-/**
- * hip texture reference
- */
-typedef struct textureReference {
- int normalized;
- enum hipTextureReadMode readMode;// used only for driver API's
- enum hipTextureFilterMode filterMode;
- enum hipTextureAddressMode addressMode[3]; // Texture address mode for up to 3 dimensions
- struct hipChannelFormatDesc channelDesc;
- int sRGB; // Perform sRGB->linear conversion during texture read
- unsigned int maxAnisotropy; // Limit to the anisotropy ratio
- enum hipTextureFilterMode mipmapFilterMode;
- float mipmapLevelBias;
- float minMipmapLevelClamp;
- float maxMipmapLevelClamp;
-
- hipTextureObject_t textureObject;
- int numChannels;
- enum hipArray_Format format;
-}textureReference;
-
-/**
- * hip texture descriptor
- */
-typedef struct hipTextureDesc {
- enum hipTextureAddressMode addressMode[3]; // Texture address mode for up to 3 dimensions
- enum hipTextureFilterMode filterMode;
- enum hipTextureReadMode readMode;
- int sRGB; // Perform sRGB->linear conversion during texture read
- float borderColor[4];
- int normalizedCoords;
- unsigned int maxAnisotropy;
- enum hipTextureFilterMode mipmapFilterMode;
- float mipmapLevelBias;
- float minMipmapLevelClamp;
- float maxMipmapLevelClamp;
-}hipTextureDesc;
-
-#endif
diff --git a/third_party/rocm/include/hip/hip_bfloat16.h b/third_party/rocm/include/hip/hip_bfloat16.h
deleted file mode 100644
index ef09cf0..0000000
--- a/third_party/rocm/include/hip/hip_bfloat16.h
+++ /dev/null
@@ -1,280 +0,0 @@
-/**
- * MIT License
- *
- * Copyright 2019-2020 Advanced Micro Devices, Inc. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*!\file
- * \brief hip_bfloat16.h provides struct for hip_bfloat16 typedef
- */
-
-#ifndef _HIP_BFLOAT16_H_
-#define _HIP_BFLOAT16_H_
-
-#if __cplusplus < 201103L || (!defined(__HCC__) && !defined(__HIPCC__))
-
-// If this is a C compiler, C++ compiler below C++11, or a host-only compiler, we only
-// include a minimal definition of hip_bfloat16
-
-#include <stdint.h>
-/*! \brief Struct to represent a 16 bit brain floating point number. */
-typedef struct
-{
- uint16_t data;
-} hip_bfloat16;
-
-#else // __cplusplus < 201103L || (!defined(__HCC__) && !defined(__HIPCC__))
-
-#include <cmath>
-#include <cstddef>
-#include <cstdint>
-#include <hip/hip_runtime.h>
-#include <ostream>
-#include <type_traits>
-
-struct hip_bfloat16
-{
- uint16_t data;
-
- enum truncate_t
- {
- truncate
- };
-
- __host__ __device__ hip_bfloat16() = default;
-
- // round upper 16 bits of IEEE float to convert to bfloat16
- explicit __host__ __device__ hip_bfloat16(float f)
- : data(float_to_bfloat16(f))
- {
- }
-
- explicit __host__ __device__ hip_bfloat16(float f, truncate_t)
- : data(truncate_float_to_bfloat16(f))
- {
- }
-
- // zero extend lower 16 bits of bfloat16 to convert to IEEE float
- __host__ __device__ operator float() const
- {
- union
- {
- uint32_t int32;
- float fp32;
- } u = {uint32_t(data) << 16};
- return u.fp32;
- }
-
- static __host__ __device__ hip_bfloat16 round_to_bfloat16(float f)
- {
- hip_bfloat16 output;
- output.data = float_to_bfloat16(f);
- return output;
- }
-
- static __host__ __device__ hip_bfloat16 round_to_bfloat16(float f, truncate_t)
- {
- hip_bfloat16 output;
- output.data = truncate_float_to_bfloat16(f);
- return output;
- }
-
-private:
- static __host__ __device__ uint16_t float_to_bfloat16(float f)
- {
- union
- {
- float fp32;
- uint32_t int32;
- } u = {f};
- if(~u.int32 & 0x7f800000)
- {
- // When the exponent bits are not all 1s, then the value is zero, normal,
- // or subnormal. We round the bfloat16 mantissa up by adding 0x7FFF, plus
- // 1 if the least significant bit of the bfloat16 mantissa is 1 (odd).
- // This causes the bfloat16's mantissa to be incremented by 1 if the 16
- // least significant bits of the float mantissa are greater than 0x8000,
- // or if they are equal to 0x8000 and the least significant bit of the
- // bfloat16 mantissa is 1 (odd). This causes it to be rounded to even when
- // the lower 16 bits are exactly 0x8000. If the bfloat16 mantissa already
- // has the value 0x7f, then incrementing it causes it to become 0x00 and
- // the exponent is incremented by one, which is the next higher FP value
- // to the unrounded bfloat16 value. When the bfloat16 value is subnormal
- // with an exponent of 0x00 and a mantissa of 0x7F, it may be rounded up
- // to a normal value with an exponent of 0x01 and a mantissa of 0x00.
- // When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F,
- // incrementing it causes it to become an exponent of 0xFF and a mantissa
- // of 0x00, which is Inf, the next higher value to the unrounded value.
- u.int32 += 0x7fff + ((u.int32 >> 16) & 1); // Round to nearest, round to even
- }
- else if(u.int32 & 0xffff)
- {
- // When all of the exponent bits are 1, the value is Inf or NaN.
- // Inf is indicated by a zero mantissa. NaN is indicated by any nonzero
- // mantissa bit. Quiet NaN is indicated by the most significant mantissa
- // bit being 1. Signaling NaN is indicated by the most significant
- // mantissa bit being 0 but some other bit(s) being 1. If any of the
- // lower 16 bits of the mantissa are 1, we set the least significant bit
- // of the bfloat16 mantissa, in order to preserve signaling NaN in case
- // the bloat16's mantissa bits are all 0.
- u.int32 |= 0x10000; // Preserve signaling NaN
- }
- return uint16_t(u.int32 >> 16);
- }
-
- // Truncate instead of rounding, preserving SNaN
- static __host__ __device__ uint16_t truncate_float_to_bfloat16(float f)
- {
- union
- {
- float fp32;
- uint32_t int32;
- } u = {f};
- return uint16_t(u.int32 >> 16) | (!(~u.int32 & 0x7f800000) && (u.int32 & 0xffff));
- }
-};
-
-typedef struct
-{
- uint16_t data;
-} hip_bfloat16_public;
-
-static_assert(std::is_standard_layout<hip_bfloat16>{},
- "hip_bfloat16 is not a standard layout type, and thus is "
- "incompatible with C.");
-
-static_assert(std::is_trivial<hip_bfloat16>{},
- "hip_bfloat16 is not a trivial type, and thus is "
- "incompatible with C.");
-
-static_assert(sizeof(hip_bfloat16) == sizeof(hip_bfloat16_public)
- && offsetof(hip_bfloat16, data) == offsetof(hip_bfloat16_public, data),
- "internal hip_bfloat16 does not match public hip_bfloat16");
-
-inline std::ostream& operator<<(std::ostream& os, const hip_bfloat16& bf16)
-{
- return os << float(bf16);
-}
-inline __host__ __device__ hip_bfloat16 operator+(hip_bfloat16 a)
-{
- return a;
-}
-inline __host__ __device__ hip_bfloat16 operator-(hip_bfloat16 a)
-{
- a.data ^= 0x8000;
- return a;
-}
-inline __host__ __device__ hip_bfloat16 operator+(hip_bfloat16 a, hip_bfloat16 b)
-{
- return hip_bfloat16(float(a) + float(b));
-}
-inline __host__ __device__ hip_bfloat16 operator-(hip_bfloat16 a, hip_bfloat16 b)
-{
- return hip_bfloat16(float(a) - float(b));
-}
-inline __host__ __device__ hip_bfloat16 operator*(hip_bfloat16 a, hip_bfloat16 b)
-{
- return hip_bfloat16(float(a) * float(b));
-}
-inline __host__ __device__ hip_bfloat16 operator/(hip_bfloat16 a, hip_bfloat16 b)
-{
- return hip_bfloat16(float(a) / float(b));
-}
-inline __host__ __device__ bool operator<(hip_bfloat16 a, hip_bfloat16 b)
-{
- return float(a) < float(b);
-}
-inline __host__ __device__ bool operator==(hip_bfloat16 a, hip_bfloat16 b)
-{
- return float(a) == float(b);
-}
-inline __host__ __device__ bool operator>(hip_bfloat16 a, hip_bfloat16 b)
-{
- return b < a;
-}
-inline __host__ __device__ bool operator<=(hip_bfloat16 a, hip_bfloat16 b)
-{
- return !(a > b);
-}
-inline __host__ __device__ bool operator!=(hip_bfloat16 a, hip_bfloat16 b)
-{
- return !(a == b);
-}
-inline __host__ __device__ bool operator>=(hip_bfloat16 a, hip_bfloat16 b)
-{
- return !(a < b);
-}
-inline __host__ __device__ hip_bfloat16& operator+=(hip_bfloat16& a, hip_bfloat16 b)
-{
- return a = a + b;
-}
-inline __host__ __device__ hip_bfloat16& operator-=(hip_bfloat16& a, hip_bfloat16 b)
-{
- return a = a - b;
-}
-inline __host__ __device__ hip_bfloat16& operator*=(hip_bfloat16& a, hip_bfloat16 b)
-{
- return a = a * b;
-}
-inline __host__ __device__ hip_bfloat16& operator/=(hip_bfloat16& a, hip_bfloat16 b)
-{
- return a = a / b;
-}
-inline __host__ __device__ hip_bfloat16& operator++(hip_bfloat16& a)
-{
- return a += hip_bfloat16(1.0f);
-}
-inline __host__ __device__ hip_bfloat16& operator--(hip_bfloat16& a)
-{
- return a -= hip_bfloat16(1.0f);
-}
-inline __host__ __device__ hip_bfloat16 operator++(hip_bfloat16& a, int)
-{
- hip_bfloat16 orig = a;
- ++a;
- return orig;
-}
-inline __host__ __device__ hip_bfloat16 operator--(hip_bfloat16& a, int)
-{
- hip_bfloat16 orig = a;
- --a;
- return orig;
-}
-
-namespace std
-{
- constexpr __host__ __device__ bool isinf(hip_bfloat16 a)
- {
- return !(~a.data & 0x7f80) && !(a.data & 0x7f);
- }
- constexpr __host__ __device__ bool isnan(hip_bfloat16 a)
- {
- return !(~a.data & 0x7f80) && +(a.data & 0x7f);
- }
- constexpr __host__ __device__ bool iszero(hip_bfloat16 a)
- {
- return !(a.data & 0x7fff);
- }
-}
-
-#endif // __cplusplus < 201103L || (!defined(__HCC__) && !defined(__HIPCC__))
-
-#endif // _HIP_BFLOAT16_H_
diff --git a/third_party/rocm/include/hip/hip_common.h b/third_party/rocm/include/hip/hip_common.h
deleted file mode 100644
index 79c787b..0000000
--- a/third_party/rocm/include/hip/hip_common.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HIP_COMMON_H
-#define HIP_INCLUDE_HIP_HIP_COMMON_H
-
-// Common code included at start of every hip file.
-// Auto enable __HIP_PLATFORM_HCC__ if compiling with HCC
-// Other compiler (GCC,ICC,etc) need to set one of these macros explicitly
-#if defined(__HCC__) || (defined(__clang__) && defined(__HIP__))
-#define __HIP_PLATFORM_HCC__
-#endif //__HCC__
-
-// Auto enable __HIP_PLATFORM_NVCC__ if compiling with NVCC
-#if defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__) && !defined(__HIP__))
-#define __HIP_PLATFORM_NVCC__
-#ifdef __CUDACC__
-#define __HIPCC__
-#endif
-
-#endif //__NVCC__
-
-// Auto enable __HIP_DEVICE_COMPILE__ if compiled in HCC or NVCC device path
-#if (defined(__HCC_ACCELERATOR__) && __HCC_ACCELERATOR__ != 0) || \
- (defined(__CUDA_ARCH__) && __CUDA_ARCH__ != 0)
-#define __HIP_DEVICE_COMPILE__ 1
-#endif
-
-#ifdef __GNUC__
-#define HIP_PUBLIC_API __attribute__ ((visibility ("default")))
-#define HIP_INTERNAL_EXPORTED_API __attribute__ ((visibility ("default")))
-#else
-#define HIP_PUBLIC_API
-#define HIP_INTERNAL_EXPORTED_API
-#endif
-
-#if __HIP_DEVICE_COMPILE__ == 0
-// 32-bit Atomics
-#define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (0)
-#define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (0)
-#define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (0)
-#define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (0)
-#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (0)
-
-// 64-bit Atomics
-#define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (0)
-#define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (0)
-
-// Doubles
-#define __HIP_ARCH_HAS_DOUBLES__ (0)
-
-// Warp cross-lane operations
-#define __HIP_ARCH_HAS_WARP_VOTE__ (0)
-#define __HIP_ARCH_HAS_WARP_BALLOT__ (0)
-#define __HIP_ARCH_HAS_WARP_SHUFFLE__ (0)
-#define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (0)
-
-// Sync
-#define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (0)
-#define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (0)
-
-// Misc
-#define __HIP_ARCH_HAS_SURFACE_FUNCS__ (0)
-#define __HIP_ARCH_HAS_3DGRID__ (0)
-#define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (0)
-#endif
-
-#endif
diff --git a/third_party/rocm/include/hip/hip_complex.h b/third_party/rocm/include/hip/hip_complex.h
deleted file mode 100644
index fb9cad5..0000000
--- a/third_party/rocm/include/hip/hip_complex.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HIP_COMPLEX_H
-#define HIP_INCLUDE_HIP_HIP_COMPLEX_H
-
-#include <hip/hip_common.h>
-
-#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
-#include <hip/hcc_detail/hip_complex.h>
-#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
-#include <hip/nvcc_detail/hip_complex.h>
-#else
-#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
-#endif
-
-#endif
diff --git a/third_party/rocm/include/hip/hip_cooperative_groups.h b/third_party/rocm/include/hip/hip_cooperative_groups.h
deleted file mode 100644
index 41f3637..0000000
--- a/third_party/rocm/include/hip/hip_cooperative_groups.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/**
- * @file hip_cooperative_groups.h
- *
- * @brief Defines new types and device API wrappers for `Cooperative Group`
- * feature.
- */
-
-#ifndef HIP_INCLUDE_HIP_HIP_COOPERATIVE_GROUP_H
-#define HIP_INCLUDE_HIP_HIP_COOPERATIVE_GROUP_H
-
-#include <hip/hip_version.h>
-#include <hip/hip_common.h>
-
-#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
-#if __cplusplus && defined(__clang__) && defined(__HIP__)
-#include <hip/hcc_detail/hip_cooperative_groups.h>
-#endif
-#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
-#include <hip/nvcc_detail/hip_cooperative_groups.h>
-#else
-#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
-#endif
-
-#endif // HIP_INCLUDE_HIP_HIP_COOPERATIVE_GROUP_H
diff --git a/third_party/rocm/include/hip/hip_ext.h b/third_party/rocm/include/hip/hip_ext.h
deleted file mode 100644
index ef8f53b..0000000
--- a/third_party/rocm/include/hip/hip_ext.h
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HIP_EXT_H
-#define HIP_INCLUDE_HIP_HIP_EXT_H
-#include "hip/hip_runtime.h"
-#if defined(__cplusplus)
-#include <tuple>
-#include <type_traits>
-#endif
-/** @addtogroup Module Module Management
- * @{
- */
-
-/**
- * @brief launches kernel f with launch parameters and shared memory on stream with arguments passed
- to kernelparams or extra
- *
- * @param [in[ f Kernel to launch.
- * @param [in] gridDimX X grid dimension specified in work-items
- * @param [in] gridDimY Y grid dimension specified in work-items
- * @param [in] gridDimZ Z grid dimension specified in work-items
- * @param [in] blockDimX X block dimensions specified in work-items
- * @param [in] blockDimY Y grid dimension specified in work-items
- * @param [in] blockDimZ Z grid dimension specified in work-items
- * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The
- kernel can access this with HIP_DYNAMIC_SHARED.
- * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th
- default stream is used with associated synchronization rules.
- * @param [in] kernelParams
- * @param [in] extra Pointer to kernel arguments. These are passed directly to the kernel and
- must be in the memory layout and alignment expected by the kernel.
- * @param [in] startEvent If non-null, specified event will be updated to track the start time of
- the kernel launch. The event must be created before calling this API.
- * @param [in] stopEvent If non-null, specified event will be updated to track the stop time of
- the kernel launch. The event must be created before calling this API.
- *
- * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue
- *
- * @warning kernellParams argument is not yet implemented in HIP. Please use extra instead. Please
- refer to hip_porting_driver_api.md for sample usage.
- * HIP/ROCm actually updates the start event when the associated kernel completes.
- */
-HIP_PUBLIC_API
-hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
- uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
- uint32_t localWorkSizeX, uint32_t localWorkSizeY,
- uint32_t localWorkSizeZ, size_t sharedMemBytes,
- hipStream_t hStream, void** kernelParams, void** extra,
- hipEvent_t startEvent = nullptr,
- hipEvent_t stopEvent = nullptr,
- uint32_t flags = 0);
-
-HIP_PUBLIC_API
-hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
- uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
- uint32_t localWorkSizeX, uint32_t localWorkSizeY,
- uint32_t localWorkSizeZ, size_t sharedMemBytes,
- hipStream_t hStream, void** kernelParams, void** extra,
- hipEvent_t startEvent = nullptr,
- hipEvent_t stopEvent = nullptr)
- __attribute__((deprecated("use hipExtModuleLaunchKernel instead")));
-
-#if defined(__HIP_ROCclr__) && defined(__cplusplus)
-
-extern "C" hipError_t hipExtLaunchKernel(const void* function_address, dim3 numBlocks,
- dim3 dimBlocks, void** args, size_t sharedMemBytes,
- hipStream_t stream, hipEvent_t startEvent,
- hipEvent_t stopEvent, int flags);
-
-template <typename... Args, typename F = void (*)(Args...)>
-inline void hipExtLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
- std::uint32_t sharedMemBytes, hipStream_t stream,
- hipEvent_t startEvent, hipEvent_t stopEvent, std::uint32_t flags,
- Args... args) {
- constexpr size_t count = sizeof...(Args);
- auto tup_ = std::tuple<Args...>{args...};
- auto tup = validateArgsCountType(kernel, tup_);
- void* _Args[count];
- pArgs<0>(tup, _Args);
-
- auto k = reinterpret_cast<void*>(kernel);
- hipExtLaunchKernel(k, numBlocks, dimBlocks, _Args, sharedMemBytes, stream, startEvent,
- stopEvent, (int)flags);
-}
-#elif defined(__HIP_PLATFORM_HCC__) && GENERIC_GRID_LAUNCH == 1 && defined(__HCC__)
-//kernel_descriptor and hip_impl::make_kernarg are in "grid_launch_GGL.hpp"
-
-namespace hip_impl {
-inline
-__attribute__((visibility("hidden")))
-void hipExtLaunchKernelGGLImpl(
- std::uintptr_t function_address,
- const dim3& numBlocks,
- const dim3& dimBlocks,
- std::uint32_t sharedMemBytes,
- hipStream_t stream,
- hipEvent_t startEvent,
- hipEvent_t stopEvent,
- std::uint32_t flags,
- void** kernarg) {
-
- const auto& kd = hip_impl::get_program_state()
- .kernel_descriptor(function_address, target_agent(stream));
-
- hipExtModuleLaunchKernel(kd, numBlocks.x * dimBlocks.x,
- numBlocks.y * dimBlocks.y,
- numBlocks.z * dimBlocks.z,
- dimBlocks.x, dimBlocks.y, dimBlocks.z,
- sharedMemBytes, stream, nullptr, kernarg,
- startEvent, stopEvent, flags);
-}
-} // namespace hip_impl
-
-template <typename... Args, typename F = void (*)(Args...)>
-inline
-void hipExtLaunchKernelGGL(F kernel, const dim3& numBlocks,
- const dim3& dimBlocks, std::uint32_t sharedMemBytes,
- hipStream_t stream, hipEvent_t startEvent,
- hipEvent_t stopEvent, std::uint32_t flags,
- Args... args) {
- hip_impl::hip_init();
- auto kernarg =
- hip_impl::make_kernarg(kernel, std::tuple<Args...>{std::move(args)...});
- std::size_t kernarg_size = kernarg.size();
-
- void* config[]{
- HIP_LAUNCH_PARAM_BUFFER_POINTER,
- kernarg.data(),
- HIP_LAUNCH_PARAM_BUFFER_SIZE,
- &kernarg_size,
- HIP_LAUNCH_PARAM_END};
-
- hip_impl::hipExtLaunchKernelGGLImpl(reinterpret_cast<std::uintptr_t>(kernel),
- numBlocks, dimBlocks, sharedMemBytes,
- stream, startEvent, stopEvent, flags,
- &config[0]);
-}
-#endif // !__HIP_ROCclr__ && defined(__cplusplus)
-
-// doxygen end AMD-specific features
-/**
- * @}
- */
-#endif // #iidef HIP_INCLUDE_HIP_HIP_EXT_H
diff --git a/third_party/rocm/include/hip/hip_fp16.h b/third_party/rocm/include/hip/hip_fp16.h
deleted file mode 100644
index 994ce62..0000000
--- a/third_party/rocm/include/hip/hip_fp16.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HIP_FP16_H
-#define HIP_INCLUDE_HIP_HIP_FP16_H
-
-#include <hip/hip_common.h>
-
-#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
-#include <hip/hcc_detail/hip_fp16.h>
-#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
-#include "cuda_fp16.h"
-#else
-#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
-#endif
-
-#endif
diff --git a/third_party/rocm/include/hip/hip_hcc.h b/third_party/rocm/include/hip/hip_hcc.h
deleted file mode 100644
index e7e27fc..0000000
--- a/third_party/rocm/include/hip/hip_hcc.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HIP_HCC_H
-#define HIP_INCLUDE_HIP_HIP_HCC_H
-#warning "hip/hip_hcc.h is deprecated, please use hip/hip_ext.h"
-#include "hip/hip_ext.h"
-#endif // #ifdef HIP_INCLUDE_HIP_HIP_HCC_H
diff --git a/third_party/rocm/include/hip/hip_profile.h b/third_party/rocm/include/hip/hip_profile.h
deleted file mode 100644
index ff18239..0000000
--- a/third_party/rocm/include/hip/hip_profile.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HIP_PROFILE_H
-#define HIP_INCLUDE_HIP_HIP_PROFILE_H
-
-#define HIP_SCOPED_MARKER(markerName, group)
-#define HIP_BEGIN_MARKER(markerName, group)
-#define HIP_END_MARKER()
-
-#endif
diff --git a/third_party/rocm/include/hip/hip_runtime.h b/third_party/rocm/include/hip/hip_runtime.h
deleted file mode 100644
index c785f8d..0000000
--- a/third_party/rocm/include/hip/hip_runtime.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-//! HIP = Heterogeneous-compute Interface for Portability
-//!
-//! Define a extremely thin runtime layer that allows source code to be compiled unmodified
-//! through either AMD HCC or NVCC. Key features tend to be in the spirit
-//! and terminology of CUDA, but with a portable path to other accelerators as well:
-//
-//! Both paths support rich C++ features including classes, templates, lambdas, etc.
-//! Runtime API is C
-//! Memory management is based on pure pointers and resembles malloc/free/copy.
-//
-//! hip_runtime.h : includes everything in hip_api.h, plus math builtins and kernel launch
-//! macros. hip_runtime_api.h : Defines HIP API. This is a C header file and does not use any C++
-//! features.
-
-#ifndef HIP_INCLUDE_HIP_HIP_RUNTIME_H
-#define HIP_INCLUDE_HIP_HIP_RUNTIME_H
-
-#if (__gfx1010__ || __gfx1011__ || __gfx1012__ || __gfx1030__ || __gfx1031__) && __AMDGCN_WAVEFRONT_SIZE == 64
-#error HIP is not supported on GFX10 with wavefront size 64
-#endif
-
-// Some standard header files, these are included by hc.hpp and so want to make them avail on both
-// paths to provide a consistent include env and avoid "missing symbol" errors that only appears
-// on NVCC path:
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-
-#if __cplusplus > 199711L
-#include <thread>
-#endif
-
-#include <hip/hip_version.h>
-#include <hip/hip_common.h>
-
-#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
-#include <hip/hcc_detail/hip_runtime.h>
-#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
-#include <hip/nvcc_detail/hip_runtime.h>
-#else
-#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
-#endif
-
-
-#include <hip/hip_runtime_api.h>
-#include <hip/hip_vector_types.h>
-#include <hip/library_types.h>
-
-#endif
diff --git a/third_party/rocm/include/hip/hip_runtime_api.h b/third_party/rocm/include/hip/hip_runtime_api.h
deleted file mode 100644
index ed9a288..0000000
--- a/third_party/rocm/include/hip/hip_runtime_api.h
+++ /dev/null
@@ -1,423 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/**
- * @file hip_runtime_api.h
- *
- * @brief Defines the API signatures for HIP runtime.
- * This file can be compiled with a standard compiler.
- */
-
-#ifndef HIP_INCLUDE_HIP_HIP_RUNTIME_API_H
-#define HIP_INCLUDE_HIP_HIP_RUNTIME_API_H
-
-
-#include <string.h> // for getDeviceProp
-#include <hip/hip_version.h>
-#include <hip/hip_common.h>
-
-enum {
- HIP_SUCCESS = 0,
- HIP_ERROR_INVALID_VALUE,
- HIP_ERROR_NOT_INITIALIZED,
- HIP_ERROR_LAUNCH_OUT_OF_RESOURCES
-};
-
-typedef struct {
- // 32-bit Atomics
- unsigned hasGlobalInt32Atomics : 1; ///< 32-bit integer atomics for global memory.
- unsigned hasGlobalFloatAtomicExch : 1; ///< 32-bit float atomic exch for global memory.
- unsigned hasSharedInt32Atomics : 1; ///< 32-bit integer atomics for shared memory.
- unsigned hasSharedFloatAtomicExch : 1; ///< 32-bit float atomic exch for shared memory.
- unsigned hasFloatAtomicAdd : 1; ///< 32-bit float atomic add in global and shared memory.
-
- // 64-bit Atomics
- unsigned hasGlobalInt64Atomics : 1; ///< 64-bit integer atomics for global memory.
- unsigned hasSharedInt64Atomics : 1; ///< 64-bit integer atomics for shared memory.
-
- // Doubles
- unsigned hasDoubles : 1; ///< Double-precision floating point.
-
- // Warp cross-lane operations
- unsigned hasWarpVote : 1; ///< Warp vote instructions (__any, __all).
- unsigned hasWarpBallot : 1; ///< Warp ballot instructions (__ballot).
- unsigned hasWarpShuffle : 1; ///< Warp shuffle operations. (__shfl_*).
- unsigned hasFunnelShift : 1; ///< Funnel two words into one with shift&mask caps.
-
- // Sync
- unsigned hasThreadFenceSystem : 1; ///< __threadfence_system.
- unsigned hasSyncThreadsExt : 1; ///< __syncthreads_count, syncthreads_and, syncthreads_or.
-
- // Misc
- unsigned hasSurfaceFuncs : 1; ///< Surface functions.
- unsigned has3dGrid : 1; ///< Grid and group dims are 3D (rather than 2D).
- unsigned hasDynamicParallelism : 1; ///< Dynamic parallelism.
-} hipDeviceArch_t;
-
-
-//---
-// Common headers for both NVCC and HCC paths:
-
-/**
- * hipDeviceProp
- *
- */
-typedef struct hipDeviceProp_t {
- char name[256]; ///< Device name.
- size_t totalGlobalMem; ///< Size of global memory region (in bytes).
- size_t sharedMemPerBlock; ///< Size of shared memory region (in bytes).
- int regsPerBlock; ///< Registers per block.
- int warpSize; ///< Warp size.
- int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size.
- int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block.
- int maxGridSize[3]; ///< Max grid dimensions (XYZ).
- int clockRate; ///< Max clock frequency of the multiProcessors in khz.
- int memoryClockRate; ///< Max global memory clock frequency in khz.
- int memoryBusWidth; ///< Global memory bus width in bits.
- size_t totalConstMem; ///< Size of shared memory region (in bytes).
- int major; ///< Major compute capability. On HCC, this is an approximation and features may
- ///< differ from CUDA CC. See the arch feature flags for portable ways to query
- ///< feature caps.
- int minor; ///< Minor compute capability. On HCC, this is an approximation and features may
- ///< differ from CUDA CC. See the arch feature flags for portable ways to query
- ///< feature caps.
- int multiProcessorCount; ///< Number of multi-processors (compute units).
- int l2CacheSize; ///< L2 cache size.
- int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor.
- int computeMode; ///< Compute mode.
- int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*"
- ///< instructions. New for HIP.
- hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP.
- int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently.
- int pciDomainID; ///< PCI Domain ID
- int pciBusID; ///< PCI Bus ID.
- int pciDeviceID; ///< PCI Device ID.
- size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per Multiprocessor.
- int isMultiGpuBoard; ///< 1 if device is on a multi-GPU board, 0 if not.
- int canMapHostMemory; ///< Check whether HIP can map host memory
- int gcnArch; ///< DEPRECATED: use gcnArchName instead
- char gcnArchName[256]; ///< AMD GCN Arch Name.
- int integrated; ///< APU vs dGPU
- int cooperativeLaunch; ///< HIP device supports cooperative launch
- int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple devices
- int maxTexture1DLinear; ///< Maximum size for 1D textures bound to linear memory
- int maxTexture1D; ///< Maximum number of elements in 1D images
- int maxTexture2D[2]; ///< Maximum dimensions (width, height) of 2D images, in image elements
- int maxTexture3D[3]; ///< Maximum dimensions (width, height, depth) of 3D images, in image elements
- unsigned int* hdpMemFlushCntl; ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register
- unsigned int* hdpRegFlushCntl; ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register
- size_t memPitch; ///<Maximum pitch in bytes allowed by memory copies
- size_t textureAlignment; ///<Alignment requirement for textures
- size_t texturePitchAlignment; ///<Pitch alignment requirement for texture references bound to pitched memory
- int kernelExecTimeoutEnabled; ///<Run time limit for kernels executed on the device
- int ECCEnabled; ///<Device has ECC support enabled
- int tccDriver; ///< 1:If device is Tesla device using TCC driver, else 0
- int cooperativeMultiDeviceUnmatchedFunc; ///< HIP device supports cooperative launch on multiple
- ///devices with unmatched functions
- int cooperativeMultiDeviceUnmatchedGridDim; ///< HIP device supports cooperative launch on multiple
- ///devices with unmatched grid dimensions
- int cooperativeMultiDeviceUnmatchedBlockDim; ///< HIP device supports cooperative launch on multiple
- ///devices with unmatched block dimensions
- int cooperativeMultiDeviceUnmatchedSharedMem; ///< HIP device supports cooperative launch on multiple
- ///devices with unmatched shared memories
- int isLargeBar; ///< 1: if it is a large PCI bar device, else 0
- int asicRevision; ///< Revision of the GPU in this device
- int managedMemory; ///< Device supports allocating managed memory on this system
- int directManagedMemAccessFromHost; ///< Host can directly access managed memory on the device without migration
- int concurrentManagedAccess; ///< Device can coherently access managed memory concurrently with the CPU
- int pageableMemoryAccess; ///< Device supports coherently accessing pageable memory
- ///< without calling hipHostRegister on it
- int pageableMemoryAccessUsesHostPageTables; ///< Device accesses pageable memory via the host's page tables
-} hipDeviceProp_t;
-
-
-/**
- * Memory type (for pointer attributes)
- */
-typedef enum hipMemoryType {
- hipMemoryTypeHost, ///< Memory is physically located on host
- hipMemoryTypeDevice, ///< Memory is physically located on device. (see deviceId for specific
- ///< device)
- hipMemoryTypeArray, ///< Array memory, physically located on device. (see deviceId for specific
- ///< device)
- hipMemoryTypeUnified ///< Not used currently
-}hipMemoryType;
-
-
-/**
- * Pointer attributes
- */
-typedef struct hipPointerAttribute_t {
- enum hipMemoryType memoryType;
- int device;
- void* devicePointer;
- void* hostPointer;
- int isManaged;
- unsigned allocationFlags; /* flags specified when memory was allocated*/
- /* peers? */
-} hipPointerAttribute_t;
-
-
-// hack to get these to show up in Doxygen:
-/**
- * @defgroup GlobalDefs Global enum and defines
- * @{
- *
- */
-
-// Ignoring error-code return values from hip APIs is discouraged. On C++17,
-// we can make that yield a warning
-#if __cplusplus >= 201703L
-#define __HIP_NODISCARD [[nodiscard]]
-#else
-#define __HIP_NODISCARD
-#endif
-
-/*
- * @brief hipError_t
- * @enum
- * @ingroup Enumerations
- */
-// Developer note - when updating these, update the hipErrorName and hipErrorString functions in
-// NVCC and HCC paths Also update the hipCUDAErrorTohipError function in NVCC path.
-
-typedef enum __HIP_NODISCARD hipError_t {
- hipSuccess = 0, ///< Successful completion.
- hipErrorInvalidValue = 1, ///< One or more of the parameters passed to the API call is NULL
- ///< or not in an acceptable range.
- hipErrorOutOfMemory = 2,
- // Deprecated
- hipErrorMemoryAllocation = 2, ///< Memory allocation error.
- hipErrorNotInitialized = 3,
- // Deprecated
- hipErrorInitializationError = 3,
- hipErrorDeinitialized = 4,
- hipErrorProfilerDisabled = 5,
- hipErrorProfilerNotInitialized = 6,
- hipErrorProfilerAlreadyStarted = 7,
- hipErrorProfilerAlreadyStopped = 8,
- hipErrorInvalidConfiguration = 9,
- hipErrorInvalidSymbol = 13,
- hipErrorInvalidDevicePointer = 17, ///< Invalid Device Pointer
- hipErrorInvalidMemcpyDirection = 21, ///< Invalid memory copy direction
- hipErrorInsufficientDriver = 35,
- hipErrorMissingConfiguration = 52,
- hipErrorPriorLaunchFailure = 53,
- hipErrorInvalidDeviceFunction = 98,
- hipErrorNoDevice = 100, ///< Call to hipGetDeviceCount returned 0 devices
- hipErrorInvalidDevice = 101, ///< DeviceID must be in range 0...#compute-devices.
- hipErrorInvalidImage = 200,
- hipErrorInvalidContext = 201, ///< Produced when input context is invalid.
- hipErrorContextAlreadyCurrent = 202,
- hipErrorMapFailed = 205,
- // Deprecated
- hipErrorMapBufferObjectFailed = 205, ///< Produced when the IPC memory attach failed from ROCr.
- hipErrorUnmapFailed = 206,
- hipErrorArrayIsMapped = 207,
- hipErrorAlreadyMapped = 208,
- hipErrorNoBinaryForGpu = 209,
- hipErrorAlreadyAcquired = 210,
- hipErrorNotMapped = 211,
- hipErrorNotMappedAsArray = 212,
- hipErrorNotMappedAsPointer = 213,
- hipErrorECCNotCorrectable = 214,
- hipErrorUnsupportedLimit = 215,
- hipErrorContextAlreadyInUse = 216,
- hipErrorPeerAccessUnsupported = 217,
- hipErrorInvalidKernelFile = 218, ///< In CUDA DRV, it is CUDA_ERROR_INVALID_PTX
- hipErrorInvalidGraphicsContext = 219,
- hipErrorInvalidSource = 300,
- hipErrorFileNotFound = 301,
- hipErrorSharedObjectSymbolNotFound = 302,
- hipErrorSharedObjectInitFailed = 303,
- hipErrorOperatingSystem = 304,
- hipErrorInvalidHandle = 400,
- // Deprecated
- hipErrorInvalidResourceHandle = 400, ///< Resource handle (hipEvent_t or hipStream_t) invalid.
- hipErrorNotFound = 500,
- hipErrorNotReady = 600, ///< Indicates that asynchronous operations enqueued earlier are not
- ///< ready. This is not actually an error, but is used to distinguish
- ///< from hipSuccess (which indicates completion). APIs that return
- ///< this error include hipEventQuery and hipStreamQuery.
- hipErrorIllegalAddress = 700,
- hipErrorLaunchOutOfResources = 701, ///< Out of resources error.
- hipErrorLaunchTimeOut = 702,
- hipErrorPeerAccessAlreadyEnabled =
- 704, ///< Peer access was already enabled from the current device.
- hipErrorPeerAccessNotEnabled =
- 705, ///< Peer access was never enabled from the current device.
- hipErrorSetOnActiveProcess = 708,
- hipErrorAssert = 710, ///< Produced when the kernel calls assert.
- hipErrorHostMemoryAlreadyRegistered =
- 712, ///< Produced when trying to lock a page-locked memory.
- hipErrorHostMemoryNotRegistered =
- 713, ///< Produced when trying to unlock a non-page-locked memory.
- hipErrorLaunchFailure =
- 719, ///< An exception occurred on the device while executing a kernel.
- hipErrorCooperativeLaunchTooLarge =
- 720, ///< This error indicates that the number of blocks launched per grid for a kernel
- ///< that was launched via cooperative launch APIs exceeds the maximum number of
- ///< allowed blocks for the current device
- hipErrorNotSupported = 801, ///< Produced when the hip API is not supported/implemented
- hipErrorUnknown = 999, //< Unknown error.
- // HSA Runtime Error Codes start here.
- hipErrorRuntimeMemory = 1052, ///< HSA runtime memory call returned error. Typically not seen
- ///< in production systems.
- hipErrorRuntimeOther = 1053, ///< HSA runtime call other than memory returned error. Typically
- ///< not seen in production systems.
- hipErrorTbd ///< Marker that more error codes are needed.
-} hipError_t;
-
-#undef __HIP_NODISCARD
-
-/*
- * @brief hipDeviceAttribute_t
- * @enum
- * @ingroup Enumerations
- */
-typedef enum hipDeviceAttribute_t {
- hipDeviceAttributeMaxThreadsPerBlock, ///< Maximum number of threads per block.
- hipDeviceAttributeMaxBlockDimX, ///< Maximum x-dimension of a block.
- hipDeviceAttributeMaxBlockDimY, ///< Maximum y-dimension of a block.
- hipDeviceAttributeMaxBlockDimZ, ///< Maximum z-dimension of a block.
- hipDeviceAttributeMaxGridDimX, ///< Maximum x-dimension of a grid.
- hipDeviceAttributeMaxGridDimY, ///< Maximum y-dimension of a grid.
- hipDeviceAttributeMaxGridDimZ, ///< Maximum z-dimension of a grid.
- hipDeviceAttributeMaxSharedMemoryPerBlock, ///< Maximum shared memory available per block in
- ///< bytes.
- hipDeviceAttributeTotalConstantMemory, ///< Constant memory size in bytes.
- hipDeviceAttributeWarpSize, ///< Warp size in threads.
- hipDeviceAttributeMaxRegistersPerBlock, ///< Maximum number of 32-bit registers available to a
- ///< thread block. This number is shared by all thread
- ///< blocks simultaneously resident on a
- ///< multiprocessor.
- hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz.
- hipDeviceAttributeMemoryClockRate, ///< Peak memory clock frequency in kilohertz.
- hipDeviceAttributeMemoryBusWidth, ///< Global memory bus width in bits.
- hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device.
- hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in.
- hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2
- ///< cache.
- hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per
- ///< multiprocessor.
- hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number.
- hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number.
- hipDeviceAttributeConcurrentKernels, ///< Device can possibly execute multiple kernels
- ///< concurrently.
- hipDeviceAttributePciBusId, ///< PCI Bus ID.
- hipDeviceAttributePciDeviceId, ///< PCI Device ID.
- hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, ///< Maximum Shared Memory Per
- ///< Multiprocessor.
- hipDeviceAttributeIsMultiGpuBoard, ///< Multiple GPU devices.
- hipDeviceAttributeIntegrated, ///< iGPU
- hipDeviceAttributeCooperativeLaunch, ///< Support cooperative launch
- hipDeviceAttributeCooperativeMultiDeviceLaunch, ///< Support cooperative launch on multiple devices
- hipDeviceAttributeMaxTexture1DWidth, ///< Maximum number of elements in 1D images
- hipDeviceAttributeMaxTexture2DWidth, ///< Maximum dimension width of 2D images in image elements
- hipDeviceAttributeMaxTexture2DHeight, ///< Maximum dimension height of 2D images in image elements
- hipDeviceAttributeMaxTexture3DWidth, ///< Maximum dimension width of 3D images in image elements
- hipDeviceAttributeMaxTexture3DHeight, ///< Maximum dimensions height of 3D images in image elements
- hipDeviceAttributeMaxTexture3DDepth, ///< Maximum dimensions depth of 3D images in image elements
-
- hipDeviceAttributeHdpMemFlushCntl, ///< Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register
- hipDeviceAttributeHdpRegFlushCntl, ///< Address of the HDP_REG_COHERENCY_FLUSH_CNTL register
-
- hipDeviceAttributeMaxPitch, ///< Maximum pitch in bytes allowed by memory copies
- hipDeviceAttributeTextureAlignment, ///<Alignment requirement for textures
- hipDeviceAttributeTexturePitchAlignment, ///<Pitch alignment requirement for 2D texture references bound to pitched memory;
- hipDeviceAttributeKernelExecTimeout, ///<Run time limit for kernels executed on the device
- hipDeviceAttributeCanMapHostMemory, ///<Device can map host memory into device address space
- hipDeviceAttributeEccEnabled, ///<Device has ECC support enabled
-
- hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc, ///< Supports cooperative launch on multiple
- ///devices with unmatched functions
- hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim, ///< Supports cooperative launch on multiple
- ///devices with unmatched grid dimensions
- hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim, ///< Supports cooperative launch on multiple
- ///devices with unmatched block dimensions
- hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem, ///< Supports cooperative launch on multiple
- ///devices with unmatched shared memories
- hipDeviceAttributeAsicRevision, ///< Revision of the GPU in this device
- hipDeviceAttributeManagedMemory, ///< Device supports allocating managed memory on this system
- hipDeviceAttributeDirectManagedMemAccessFromHost, ///< Host can directly access managed memory on
- /// the device without migration
- hipDeviceAttributeConcurrentManagedAccess, ///< Device can coherently access managed memory
- /// concurrently with the CPU
- hipDeviceAttributePageableMemoryAccess, ///< Device supports coherently accessing pageable memory
- /// without calling hipHostRegister on it
- hipDeviceAttributePageableMemoryAccessUsesHostPageTables, ///< Device accesses pageable memory via
- /// the host's page tables
-} hipDeviceAttribute_t;
-
-enum hipComputeMode {
- hipComputeModeDefault = 0,
- hipComputeModeExclusive = 1,
- hipComputeModeProhibited = 2,
- hipComputeModeExclusiveProcess = 3
-};
-
-/**
- * @}
- */
-
-#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
-#include "hip/hcc_detail/hip_runtime_api.h"
-#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
-#include "hip/nvcc_detail/hip_runtime_api.h"
-#else
-#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
-#endif
-
-
-/**
- * @brief: C++ wrapper for hipMalloc
- *
- * Perform automatic type conversion to eliminate need for excessive typecasting (ie void**)
- *
- * __HIP_DISABLE_CPP_FUNCTIONS__ macro can be defined to suppress these
- * wrappers. It is useful for applications which need to obtain decltypes of
- * HIP runtime APIs.
- *
- * @see hipMalloc
- */
-#if defined(__cplusplus) && !defined(__HIP_DISABLE_CPP_FUNCTIONS__)
-template <class T>
-static inline hipError_t hipMalloc(T** devPtr, size_t size) {
- return hipMalloc((void**)devPtr, size);
-}
-
-// Provide an override to automatically typecast the pointer type from void**, and also provide a
-// default for the flags.
-template <class T>
-static inline hipError_t hipHostMalloc(T** ptr, size_t size,
- unsigned int flags = hipHostMallocDefault) {
- return hipHostMalloc((void**)ptr, size, flags);
-}
-
-template <class T>
-static inline hipError_t hipMallocManaged(T** devPtr, size_t size,
- unsigned int flags = hipMemAttachGlobal) {
- return hipMallocManaged((void**)devPtr, size, flags);
-}
-#endif
-
-#endif
diff --git a/third_party/rocm/include/hip/hip_texture_types.h b/third_party/rocm/include/hip/hip_texture_types.h
deleted file mode 100644
index a7feab0..0000000
--- a/third_party/rocm/include/hip/hip_texture_types.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
-Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-
-#ifndef HIP_INCLUDE_HIP_HIP_TEXTURE_TYPES_H
-#define HIP_INCLUDE_HIP_HIP_TEXTURE_TYPES_H
-
-#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
-#include <hip/hcc_detail/hip_texture_types.h>
-#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
-#include <hip/nvcc_detail/hip_texture_types.h>
-#else
-#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
-#endif
-
-
-#endif
diff --git a/third_party/rocm/include/hip/hip_vector_types.h b/third_party/rocm/include/hip/hip_vector_types.h
deleted file mode 100644
index c1a0373..0000000
--- a/third_party/rocm/include/hip/hip_vector_types.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-//! hip_vector_types.h : Defines the HIP vector types.
-
-#ifndef HIP_INCLUDE_HIP_HIP_VECTOR_TYPES_H
-#define HIP_INCLUDE_HIP_HIP_VECTOR_TYPES_H
-
-#include <hip/hip_common.h>
-
-
-#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
-#if __cplusplus
-#include <hip/hcc_detail/hip_vector_types.h>
-#endif
-#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
-#include <vector_types.h>
-#else
-#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
-#endif
-
-#endif
diff --git a/third_party/rocm/include/hip/hip_version.h b/third_party/rocm/include/hip/hip_version.h
deleted file mode 100644
index 2fdb247..0000000
--- a/third_party/rocm/include/hip/hip_version.h
+++ /dev/null
@@ -1,14 +0,0 @@
-// Auto-generated by cmake
-
-#ifndef HIP_VERSION_H
-#define HIP_VERSION_H
-
-#define HIP_VERSION_MAJOR 4
-#define HIP_VERSION_MINOR 1
-#define HIP_VERSION_PATCH 21114
-#define HIP_VERSION (HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR)
-
-#define __HIP_HAS_GET_PCH 1
-
-#endif
-
diff --git a/third_party/rocm/include/hip/hiprtc.h b/third_party/rocm/include/hip/hiprtc.h
deleted file mode 100644
index 22d78d2..0000000
--- a/third_party/rocm/include/hip/hiprtc.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-#pragma once
-
-#include <hip/hip_common.h>
-
-#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
- #include <hip/hcc_detail/hiprtc.h>
-#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
- #include <hip/nvcc_detail/nvrtc.h>
-#else
- #error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
-#endif
\ No newline at end of file
diff --git a/third_party/rocm/include/hip/library_types.h b/third_party/rocm/include/hip/library_types.h
deleted file mode 100644
index 4a988df..0000000
--- a/third_party/rocm/include/hip/library_types.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_LIBRARY_TYPES_H
-#define HIP_INCLUDE_HIP_LIBRARY_TYPES_H
-
-#include <hip/hip_common.h>
-
-#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
-#include <hip/hcc_detail/library_types.h>
-#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
-#include "library_types.h"
-#else
-#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
-#endif
-
-#endif
diff --git a/third_party/rocm/include/hip/math_functions.h b/third_party/rocm/include/hip/math_functions.h
deleted file mode 100644
index 2dfec45..0000000
--- a/third_party/rocm/include/hip/math_functions.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_MATH_FUNCTIONS_H
-#define HIP_INCLUDE_HIP_MATH_FUNCTIONS_H
-
-// Some standard header files, these are included by hc.hpp and so want to make them avail on both
-// paths to provide a consistent include env and avoid "missing symbol" errors that only appears
-// on NVCC path:
-
-#include <hip/hip_common.h>
-
-#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
-#include <hip/hcc_detail/math_functions.h>
-#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
-//#include <hip/nvcc_detail/math_functions.h>
-#else
-#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
-#endif
-
-#endif
diff --git a/third_party/rocm/include/hip/nvcc_detail/channel_descriptor.h b/third_party/rocm/include/hip/nvcc_detail/channel_descriptor.h
deleted file mode 100644
index c3e9dc1..0000000
--- a/third_party/rocm/include/hip/nvcc_detail/channel_descriptor.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_CHANNEL_DESCRIPTOR_H
-#define HIP_INCLUDE_HIP_NVCC_DETAIL_CHANNEL_DESCRIPTOR_H
-
-#include "channel_descriptor.h"
-
-#endif
diff --git a/third_party/rocm/include/hip/nvcc_detail/hip_complex.h b/third_party/rocm/include/hip/nvcc_detail/hip_complex.h
deleted file mode 100644
index d0e45d2..0000000
--- a/third_party/rocm/include/hip/nvcc_detail/hip_complex.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_COMPLEX_H
-#define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_COMPLEX_H
-
-#include "cuComplex.h"
-
-typedef cuFloatComplex hipFloatComplex;
-
-__device__ __host__ static inline float hipCrealf(hipFloatComplex z) { return cuCrealf(z); }
-
-__device__ __host__ static inline float hipCimagf(hipFloatComplex z) { return cuCimagf(z); }
-
-__device__ __host__ static inline hipFloatComplex make_hipFloatComplex(float a, float b) {
- return make_cuFloatComplex(a, b);
-}
-
-__device__ __host__ static inline hipFloatComplex hipConjf(hipFloatComplex z) { return cuConjf(z); }
-
-__device__ __host__ static inline float hipCsqabsf(hipFloatComplex z) {
- return cuCabsf(z) * cuCabsf(z);
-}
-
-__device__ __host__ static inline hipFloatComplex hipCaddf(hipFloatComplex p, hipFloatComplex q) {
- return cuCaddf(p, q);
-}
-
-__device__ __host__ static inline hipFloatComplex hipCsubf(hipFloatComplex p, hipFloatComplex q) {
- return cuCsubf(p, q);
-}
-
-__device__ __host__ static inline hipFloatComplex hipCmulf(hipFloatComplex p, hipFloatComplex q) {
- return cuCmulf(p, q);
-}
-
-__device__ __host__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hipFloatComplex q) {
- return cuCdivf(p, q);
-}
-
-__device__ __host__ static inline float hipCabsf(hipFloatComplex z) { return cuCabsf(z); }
-
-typedef cuDoubleComplex hipDoubleComplex;
-
-__device__ __host__ static inline double hipCreal(hipDoubleComplex z) { return cuCreal(z); }
-
-__device__ __host__ static inline double hipCimag(hipDoubleComplex z) { return cuCimag(z); }
-
-__device__ __host__ static inline hipDoubleComplex make_hipDoubleComplex(double a, double b) {
- return make_cuDoubleComplex(a, b);
-}
-
-__device__ __host__ static inline hipDoubleComplex hipConj(hipDoubleComplex z) { return cuConj(z); }
-
-__device__ __host__ static inline double hipCsqabs(hipDoubleComplex z) {
- return cuCabs(z) * cuCabs(z);
-}
-
-__device__ __host__ static inline hipDoubleComplex hipCadd(hipDoubleComplex p, hipDoubleComplex q) {
- return cuCadd(p, q);
-}
-
-__device__ __host__ static inline hipDoubleComplex hipCsub(hipDoubleComplex p, hipDoubleComplex q) {
- return cuCsub(p, q);
-}
-
-__device__ __host__ static inline hipDoubleComplex hipCmul(hipDoubleComplex p, hipDoubleComplex q) {
- return cuCmul(p, q);
-}
-
-__device__ __host__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, hipDoubleComplex q) {
- return cuCdiv(p, q);
-}
-
-__device__ __host__ static inline double hipCabs(hipDoubleComplex z) { return cuCabs(z); }
-
-typedef cuFloatComplex hipComplex;
-
-__device__ __host__ static inline hipComplex make_Complex(float x, float y) {
- return make_cuComplex(x, y);
-}
-
-__device__ __host__ static inline hipFloatComplex hipComplexDoubleToFloat(hipDoubleComplex z) {
- return cuComplexDoubleToFloat(z);
-}
-
-__device__ __host__ static inline hipDoubleComplex hipComplexFloatToDouble(hipFloatComplex z) {
- return cuComplexFloatToDouble(z);
-}
-
-__device__ __host__ static inline hipComplex hipCfmaf(hipComplex p, hipComplex q, hipComplex r) {
- return cuCfmaf(p, q, r);
-}
-
-__device__ __host__ static inline hipDoubleComplex hipCfma(hipDoubleComplex p, hipDoubleComplex q,
- hipDoubleComplex r) {
- return cuCfma(p, q, r);
-}
-
-#endif
diff --git a/third_party/rocm/include/hip/nvcc_detail/hip_cooperative_groups.h b/third_party/rocm/include/hip/nvcc_detail/hip_cooperative_groups.h
deleted file mode 100644
index 113e600..0000000
--- a/third_party/rocm/include/hip/nvcc_detail/hip_cooperative_groups.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_COOPERATIVE_GROUPS_H
-#define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_COOPERATIVE_GROUPS_H
-
-// Include CUDA headers
-#include <cuda_runtime.h>
-#include <cooperative_groups.h>
-
-// Include HIP wrapper headers around CUDA
-#include <hip/hip_runtime.h>
-#include <hip/hip_runtime_api.h>
-
-#endif // HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_COOPERATIVE_GROUPS_H
diff --git a/third_party/rocm/include/hip/nvcc_detail/hip_runtime.h b/third_party/rocm/include/hip/nvcc_detail/hip_runtime.h
deleted file mode 100644
index e7c3eaf..0000000
--- a/third_party/rocm/include/hip/nvcc_detail/hip_runtime.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_H
-#define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_H
-
-#include <cuda_runtime.h>
-
-#include <hip/hip_runtime_api.h>
-
-#define HIP_KERNEL_NAME(...) __VA_ARGS__
-
-typedef int hipLaunchParm;
-
-#define hipLaunchKernelGGLInternal(kernelName, numBlocks, numThreads, memPerBlock, streamId, ...) \
- do { \
- kernelName<<<numBlocks, numThreads, memPerBlock, streamId>>>(__VA_ARGS__); \
- } while (0)
-
-#define hipLaunchKernelGGL(kernelName, ...) hipLaunchKernelGGLInternal((kernelName), __VA_ARGS__)
-
-#define hipReadModeElementType cudaReadModeElementType
-
-#ifdef __CUDA_ARCH__
-
-
-// 32-bit Atomics:
-#define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (__CUDA_ARCH__ >= 110)
-#define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (__CUDA_ARCH__ >= 110)
-#define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (__CUDA_ARCH__ >= 120)
-#define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (__CUDA_ARCH__ >= 120)
-#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (__CUDA_ARCH__ >= 200)
-
-// 64-bit Atomics:
-#define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (__CUDA_ARCH__ >= 200)
-#define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (__CUDA_ARCH__ >= 120)
-
-// Doubles
-#define __HIP_ARCH_HAS_DOUBLES__ (__CUDA_ARCH__ >= 120)
-
-// warp cross-lane operations:
-#define __HIP_ARCH_HAS_WARP_VOTE__ (__CUDA_ARCH__ >= 120)
-#define __HIP_ARCH_HAS_WARP_BALLOT__ (__CUDA_ARCH__ >= 200)
-#define __HIP_ARCH_HAS_WARP_SHUFFLE__ (__CUDA_ARCH__ >= 300)
-#define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (__CUDA_ARCH__ >= 350)
-
-// sync
-#define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (__CUDA_ARCH__ >= 200)
-#define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (__CUDA_ARCH__ >= 200)
-
-// misc
-#define __HIP_ARCH_HAS_SURFACE_FUNCS__ (__CUDA_ARCH__ >= 200)
-#define __HIP_ARCH_HAS_3DGRID__ (__CUDA_ARCH__ >= 200)
-#define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (__CUDA_ARCH__ >= 350)
-
-#endif
-
-#ifdef __CUDACC__
-
-
-#define hipThreadIdx_x threadIdx.x
-#define hipThreadIdx_y threadIdx.y
-#define hipThreadIdx_z threadIdx.z
-
-#define hipBlockIdx_x blockIdx.x
-#define hipBlockIdx_y blockIdx.y
-#define hipBlockIdx_z blockIdx.z
-
-#define hipBlockDim_x blockDim.x
-#define hipBlockDim_y blockDim.y
-#define hipBlockDim_z blockDim.z
-
-#define hipGridDim_x gridDim.x
-#define hipGridDim_y gridDim.y
-#define hipGridDim_z gridDim.z
-
-#define HIP_SYMBOL(X) &X
-
-/**
- * extern __shared__
- */
-
-#define HIP_DYNAMIC_SHARED(type, var) extern __shared__ type var[];
-
-#define HIP_DYNAMIC_SHARED_ATTRIBUTE
-
-#ifdef __HIP_DEVICE_COMPILE__
-#define abort_() \
- { asm("trap;"); }
-#undef assert
-#define assert(COND) \
- { \
- if (!COND) { \
- abort_(); \
- } \
- }
-#endif
-
-#define __clock() clock()
-#define __clock64() clock64()
-
-#endif
-
-#endif
diff --git a/third_party/rocm/include/hip/nvcc_detail/hip_runtime_api.h b/third_party/rocm/include/hip/nvcc_detail/hip_runtime_api.h
deleted file mode 100644
index 257d795..0000000
--- a/third_party/rocm/include/hip/nvcc_detail/hip_runtime_api.h
+++ /dev/null
@@ -1,2045 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
-#define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
-
-#include <cuda_runtime_api.h>
-#include <cuda.h>
-#include <cuda_profiler_api.h>
-#include <cuda_fp16.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef __cplusplus
-#define __dparm(x) = x
-#else
-#define __dparm(x)
-#endif
-
-// Add Deprecated Support for CUDA Mapped HIP APIs
-#if defined(__DOXYGEN_ONLY__) || defined(HIP_ENABLE_DEPRECATED)
-#define __HIP_DEPRECATED
-#elif defined(_MSC_VER)
-#define __HIP_DEPRECATED __declspec(deprecated)
-#elif defined(__GNUC__)
-#define __HIP_DEPRECATED __attribute__((deprecated))
-#else
-#define __HIP_DEPRECATED
-#endif
-
-
-// TODO -move to include/hip_runtime_api.h as a common implementation.
-/**
- * Memory copy types
- *
- */
-typedef enum hipMemcpyKind {
- hipMemcpyHostToHost,
- hipMemcpyHostToDevice,
- hipMemcpyDeviceToHost,
- hipMemcpyDeviceToDevice,
- hipMemcpyDefault
-} hipMemcpyKind;
-
-// hipDataType
-#define hipDataType cudaDataType
-#define HIP_R_16F CUDA_R_16F
-#define HIP_R_32F CUDA_R_32F
-#define HIP_R_64F CUDA_R_64F
-#define HIP_C_16F CUDA_C_16F
-#define HIP_C_32F CUDA_C_32F
-#define HIP_C_64F CUDA_C_64F
-
-// hipLibraryPropertyType
-#define hipLibraryPropertyType libraryPropertyType
-#define HIP_LIBRARY_MAJOR_VERSION MAJOR_VERSION
-#define HIP_LIBRARY_MINOR_VERSION MINOR_VERSION
-#define HIP_LIBRARY_PATCH_LEVEL PATCH_LEVEL
-
-#define HIP_ARRAY_DESCRIPTOR CUDA_ARRAY_DESCRIPTOR
-
-//hipArray_Format
-#define HIP_AD_FORMAT_UNSIGNED_INT8 CU_AD_FORMAT_UNSIGNED_INT8
-#define HIP_AD_FORMAT_UNSIGNED_INT16 CU_AD_FORMAT_UNSIGNED_INT16
-#define HIP_AD_FORMAT_UNSIGNED_INT32 CU_AD_FORMAT_UNSIGNED_INT32
-#define HIP_AD_FORMAT_SIGNED_INT8 CU_AD_FORMAT_SIGNED_INT8
-#define HIP_AD_FORMAT_SIGNED_INT16 CU_AD_FORMAT_SIGNED_INT16
-#define HIP_AD_FORMAT_SIGNED_INT32 CU_AD_FORMAT_SIGNED_INT32
-#define HIP_AD_FORMAT_HALF CU_AD_FORMAT_HALF
-#define HIP_AD_FORMAT_FLOAT CU_AD_FORMAT_FLOAT
-
-// hipArray_Format
-#define hipArray_Format CUarray_format
-
-inline static CUarray_format hipArray_FormatToCUarray_format(
- hipArray_Format format) {
- switch (format) {
- case HIP_AD_FORMAT_UNSIGNED_INT8:
- return CU_AD_FORMAT_UNSIGNED_INT8;
- case HIP_AD_FORMAT_UNSIGNED_INT16:
- return CU_AD_FORMAT_UNSIGNED_INT16;
- case HIP_AD_FORMAT_UNSIGNED_INT32:
- return CU_AD_FORMAT_UNSIGNED_INT32;
- case HIP_AD_FORMAT_SIGNED_INT8:
- return CU_AD_FORMAT_SIGNED_INT8;
- case HIP_AD_FORMAT_SIGNED_INT16:
- return CU_AD_FORMAT_SIGNED_INT16;
- case HIP_AD_FORMAT_SIGNED_INT32:
- return CU_AD_FORMAT_SIGNED_INT32;
- case HIP_AD_FORMAT_HALF:
- return CU_AD_FORMAT_HALF;
- case HIP_AD_FORMAT_FLOAT:
- return CU_AD_FORMAT_FLOAT;
- default:
- return CU_AD_FORMAT_UNSIGNED_INT8;
- }
-}
-
-#define HIP_TR_ADDRESS_MODE_WRAP CU_TR_ADDRESS_MODE_WRAP
-#define HIP_TR_ADDRESS_MODE_CLAMP CU_TR_ADDRESS_MODE_CLAMP
-#define HIP_TR_ADDRESS_MODE_MIRROR CU_TR_ADDRESS_MODE_MIRROR
-#define HIP_TR_ADDRESS_MODE_BORDER CU_TR_ADDRESS_MODE_BORDER
-
-// hipAddress_mode
-#define hipAddress_mode CUaddress_mode
-
-inline static CUaddress_mode hipAddress_modeToCUaddress_mode(
- hipAddress_mode mode) {
- switch (mode) {
- case HIP_TR_ADDRESS_MODE_WRAP:
- return CU_TR_ADDRESS_MODE_WRAP;
- case HIP_TR_ADDRESS_MODE_CLAMP:
- return CU_TR_ADDRESS_MODE_CLAMP;
- case HIP_TR_ADDRESS_MODE_MIRROR:
- return CU_TR_ADDRESS_MODE_MIRROR;
- case HIP_TR_ADDRESS_MODE_BORDER:
- return CU_TR_ADDRESS_MODE_BORDER;
- default:
- return CU_TR_ADDRESS_MODE_WRAP;
- }
-}
-
-#define HIP_TR_FILTER_MODE_POINT CU_TR_FILTER_MODE_POINT
-#define HIP_TR_FILTER_MODE_LINEAR CU_TR_FILTER_MODE_LINEAR
-
-// hipFilter_mode
-#define hipFilter_mode CUfilter_mode
-
-inline static CUfilter_mode hipFilter_mode_enumToCUfilter_mode(
- hipFilter_mode mode) {
- switch (mode) {
- case HIP_TR_FILTER_MODE_POINT:
- return CU_TR_FILTER_MODE_POINT;
- case HIP_TR_FILTER_MODE_LINEAR:
- return CU_TR_FILTER_MODE_LINEAR;
- default:
- return CU_TR_FILTER_MODE_POINT;
- }
-}
-
-//hipResourcetype
-#define HIP_RESOURCE_TYPE_ARRAY CU_RESOURCE_TYPE_ARRAY
-#define HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY CU_RESOURCE_TYPE_MIPMAPPED_ARRAY
-#define HIP_RESOURCE_TYPE_LINEAR CU_RESOURCE_TYPE_LINEAR
-#define HIP_RESOURCE_TYPE_PITCH2D CU_RESOURCE_TYPE_PITCH2D
-
-// hipResourcetype
-#define hipResourcetype CUresourcetype
-
-inline static CUresourcetype hipResourcetype_enumToCUresourcetype(
- hipResourcetype resType) {
- switch (resType) {
- case HIP_RESOURCE_TYPE_ARRAY:
- return CU_RESOURCE_TYPE_ARRAY;
- case HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY:
- return CU_RESOURCE_TYPE_MIPMAPPED_ARRAY;
- case HIP_RESOURCE_TYPE_LINEAR:
- return CU_RESOURCE_TYPE_LINEAR;
- case HIP_RESOURCE_TYPE_PITCH2D:
- return CU_RESOURCE_TYPE_PITCH2D;
- default:
- return CU_RESOURCE_TYPE_ARRAY;
- }
-}
-
-#define hipTexRef CUtexref
-#define hiparray CUarray
-
-// hipTextureAddressMode
-typedef enum cudaTextureAddressMode hipTextureAddressMode;
-#define hipAddressModeWrap cudaAddressModeWrap
-#define hipAddressModeClamp cudaAddressModeClamp
-#define hipAddressModeMirror cudaAddressModeMirror
-#define hipAddressModeBorder cudaAddressModeBorder
-
-// hipTextureFilterMode
-typedef enum cudaTextureFilterMode hipTextureFilterMode;
-#define hipFilterModePoint cudaFilterModePoint
-#define hipFilterModeLinear cudaFilterModeLinear
-
-// hipTextureReadMode
-typedef enum cudaTextureReadMode hipTextureReadMode;
-#define hipReadModeElementType cudaReadModeElementType
-#define hipReadModeNormalizedFloat cudaReadModeNormalizedFloat
-
-// hipChannelFormatKind
-typedef enum cudaChannelFormatKind hipChannelFormatKind;
-#define hipChannelFormatKindSigned cudaChannelFormatKindSigned
-#define hipChannelFormatKindUnsigned cudaChannelFormatKindUnsigned
-#define hipChannelFormatKindFloat cudaChannelFormatKindFloat
-#define hipChannelFormatKindNone cudaChannelFormatKindNone
-
-#define hipSurfaceBoundaryMode cudaSurfaceBoundaryMode
-#define hipBoundaryModeZero cudaBoundaryModeZero
-#define hipBoundaryModeTrap cudaBoundaryModeTrap
-#define hipBoundaryModeClamp cudaBoundaryModeClamp
-
-// hipFuncCache
-#define hipFuncCachePreferNone cudaFuncCachePreferNone
-#define hipFuncCachePreferShared cudaFuncCachePreferShared
-#define hipFuncCachePreferL1 cudaFuncCachePreferL1
-#define hipFuncCachePreferEqual cudaFuncCachePreferEqual
-
-// hipResourceType
-#define hipResourceType cudaResourceType
-#define hipResourceTypeArray cudaResourceTypeArray
-#define hipResourceTypeMipmappedArray cudaResourceTypeMipmappedArray
-#define hipResourceTypeLinear cudaResourceTypeLinear
-#define hipResourceTypePitch2D cudaResourceTypePitch2D
-//
-// hipErrorNoDevice.
-
-
-//! Flags that can be used with hipEventCreateWithFlags:
-#define hipEventDefault cudaEventDefault
-#define hipEventBlockingSync cudaEventBlockingSync
-#define hipEventDisableTiming cudaEventDisableTiming
-#define hipEventInterprocess cudaEventInterprocess
-#define hipEventReleaseToDevice 0 /* no-op on CUDA platform */
-#define hipEventReleaseToSystem 0 /* no-op on CUDA platform */
-
-
-#define hipHostMallocDefault cudaHostAllocDefault
-#define hipHostMallocPortable cudaHostAllocPortable
-#define hipHostMallocMapped cudaHostAllocMapped
-#define hipHostMallocWriteCombined cudaHostAllocWriteCombined
-#define hipHostMallocCoherent 0x0
-#define hipHostMallocNonCoherent 0x0
-
-#define hipMemAttachGlobal cudaMemAttachGlobal
-#define hipMemAttachHost cudaMemAttachHost
-
-#define hipHostRegisterDefault cudaHostRegisterDefault
-#define hipHostRegisterPortable cudaHostRegisterPortable
-#define hipHostRegisterMapped cudaHostRegisterMapped
-#define hipHostRegisterIoMemory cudaHostRegisterIoMemory
-
-#define HIP_LAUNCH_PARAM_BUFFER_POINTER CU_LAUNCH_PARAM_BUFFER_POINTER
-#define HIP_LAUNCH_PARAM_BUFFER_SIZE CU_LAUNCH_PARAM_BUFFER_SIZE
-#define HIP_LAUNCH_PARAM_END CU_LAUNCH_PARAM_END
-#define hipLimitMallocHeapSize cudaLimitMallocHeapSize
-#define hipIpcMemLazyEnablePeerAccess cudaIpcMemLazyEnablePeerAccess
-
-#define hipOccupancyDefault cudaOccupancyDefault
-
-#define hipCooperativeLaunchMultiDeviceNoPreSync \
- cudaCooperativeLaunchMultiDeviceNoPreSync
-#define hipCooperativeLaunchMultiDeviceNoPostSync \
- cudaCooperativeLaunchMultiDeviceNoPostSync
-
-
-// enum CUjit_option redefines
-#define hipJitOptionMaxRegisters CU_JIT_MAX_REGISTERS
-#define hipJitOptionThreadsPerBlock CU_JIT_THREADS_PER_BLOCK
-#define hipJitOptionWallTime CU_JIT_WALL_TIME
-#define hipJitOptionInfoLogBuffer CU_JIT_INFO_LOG_BUFFER
-#define hipJitOptionInfoLogBufferSizeBytes CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES
-#define hipJitOptionErrorLogBuffer CU_JIT_ERROR_LOG_BUFFER
-#define hipJitOptionErrorLogBufferSizeBytes CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES
-#define hipJitOptionOptimizationLevel CU_JIT_OPTIMIZATION_LEVEL
-#define hipJitOptionTargetFromContext CU_JIT_TARGET_FROM_CUCONTEXT
-#define hipJitOptionTarget CU_JIT_TARGET
-#define hipJitOptionFallbackStrategy CU_JIT_FALLBACK_STRATEGY
-#define hipJitOptionGenerateDebugInfo CU_JIT_GENERATE_DEBUG_INFO
-#define hipJitOptionLogVerbose CU_JIT_LOG_VERBOSE
-#define hipJitOptionGenerateLineInfo CU_JIT_GENERATE_LINE_INFO
-#define hipJitOptionCacheMode CU_JIT_CACHE_MODE
-#define hipJitOptionSm3xOpt CU_JIT_NEW_SM3X_OPT
-#define hipJitOptionFastCompile CU_JIT_FAST_COMPILE
-#define hipJitOptionNumOptions CU_JIT_NUM_OPTIONS
-
-typedef cudaEvent_t hipEvent_t;
-typedef cudaStream_t hipStream_t;
-typedef cudaIpcEventHandle_t hipIpcEventHandle_t;
-typedef cudaIpcMemHandle_t hipIpcMemHandle_t;
-typedef enum cudaLimit hipLimit_t;
-typedef enum cudaFuncAttribute hipFuncAttribute;
-typedef enum cudaFuncCache hipFuncCache_t;
-typedef CUcontext hipCtx_t;
-typedef enum cudaSharedMemConfig hipSharedMemConfig;
-typedef CUfunc_cache hipFuncCache;
-typedef CUjit_option hipJitOption;
-typedef CUdevice hipDevice_t;
-typedef enum cudaDeviceP2PAttr hipDeviceP2PAttr;
-#define hipDevP2PAttrPerformanceRank cudaDevP2PAttrPerformanceRank
-#define hipDevP2PAttrAccessSupported cudaDevP2PAttrAccessSupported
-#define hipDevP2PAttrNativeAtomicSupported cudaDevP2PAttrNativeAtomicSupported
-#define hipDevP2PAttrHipArrayAccessSupported cudaDevP2PAttrCudaArrayAccessSupported
-#define hipFuncAttributeMaxDynamicSharedMemorySize cudaFuncAttributeMaxDynamicSharedMemorySize
-#define hipFuncAttributePreferredSharedMemoryCarveout cudaFuncAttributePreferredSharedMemoryCarveout
-
-typedef CUmodule hipModule_t;
-typedef CUfunction hipFunction_t;
-typedef CUdeviceptr hipDeviceptr_t;
-typedef struct cudaArray hipArray;
-typedef struct cudaArray* hipArray_t;
-typedef struct cudaArray* hipArray_const_t;
-typedef struct cudaFuncAttributes hipFuncAttributes;
-typedef struct cudaLaunchParams hipLaunchParams;
-#define hipFunction_attribute CUfunction_attribute
-#define hip_Memcpy2D CUDA_MEMCPY2D
-#define hipMemcpy3DParms cudaMemcpy3DParms
-#define hipArrayDefault cudaArrayDefault
-#define hipArrayLayered cudaArrayLayered
-#define hipArraySurfaceLoadStore cudaArraySurfaceLoadStore
-#define hipArrayCubemap cudaArrayCubemap
-#define hipArrayTextureGather cudaArrayTextureGather
-
-typedef cudaTextureObject_t hipTextureObject_t;
-typedef cudaSurfaceObject_t hipSurfaceObject_t;
-#define hipTextureType1D cudaTextureType1D
-#define hipTextureType1DLayered cudaTextureType1DLayered
-#define hipTextureType2D cudaTextureType2D
-#define hipTextureType2DLayered cudaTextureType2DLayered
-#define hipTextureType3D cudaTextureType3D
-#define hipDeviceMapHost cudaDeviceMapHost
-
-typedef struct cudaExtent hipExtent;
-typedef struct cudaPitchedPtr hipPitchedPtr;
-#define make_hipExtent make_cudaExtent
-#define make_hipPos make_cudaPos
-#define make_hipPitchedPtr make_cudaPitchedPtr
-// Flags that can be used with hipStreamCreateWithFlags
-#define hipStreamDefault cudaStreamDefault
-#define hipStreamNonBlocking cudaStreamNonBlocking
-
-typedef struct cudaChannelFormatDesc hipChannelFormatDesc;
-typedef struct cudaResourceDesc hipResourceDesc;
-typedef struct cudaTextureDesc hipTextureDesc;
-typedef struct cudaResourceViewDesc hipResourceViewDesc;
-// adding code for hipmemSharedConfig
-#define hipSharedMemBankSizeDefault cudaSharedMemBankSizeDefault
-#define hipSharedMemBankSizeFourByte cudaSharedMemBankSizeFourByte
-#define hipSharedMemBankSizeEightByte cudaSharedMemBankSizeEightByte
-
-//Function Attributes
-#define HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK
-#define HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES
-#define HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES
-#define HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES
-#define HIP_FUNC_ATTRIBUTE_NUM_REGS CU_FUNC_ATTRIBUTE_NUM_REGS
-#define HIP_FUNC_ATTRIBUTE_PTX_VERSION CU_FUNC_ATTRIBUTE_PTX_VERSION
-#define HIP_FUNC_ATTRIBUTE_BINARY_VERSION CU_FUNC_ATTRIBUTE_BINARY_VERSION
-#define HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA CU_FUNC_ATTRIBUTE_CACHE_MODE_CA
-#define HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES
-#define HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT
-#define HIP_FUNC_ATTRIBUTE_MAX CU_FUNC_ATTRIBUTE_MAX
-
-#if CUDA_VERSION >= 9000
-#define __shfl(...) __shfl_sync(0xffffffff, __VA_ARGS__)
-#define __shfl_up(...) __shfl_up_sync(0xffffffff, __VA_ARGS__)
-#define __shfl_down(...) __shfl_down_sync(0xffffffff, __VA_ARGS__)
-#define __shfl_xor(...) __shfl_xor_sync(0xffffffff, __VA_ARGS__)
-#endif // CUDA_VERSION >= 9000
-
-inline static hipError_t hipCUDAErrorTohipError(cudaError_t cuError) {
- switch (cuError) {
- case cudaSuccess:
- return hipSuccess;
- case cudaErrorProfilerDisabled:
- return hipErrorProfilerDisabled;
- case cudaErrorProfilerNotInitialized:
- return hipErrorProfilerNotInitialized;
- case cudaErrorProfilerAlreadyStarted:
- return hipErrorProfilerAlreadyStarted;
- case cudaErrorProfilerAlreadyStopped:
- return hipErrorProfilerAlreadyStopped;
- case cudaErrorInsufficientDriver:
- return hipErrorInsufficientDriver;
- case cudaErrorUnsupportedLimit:
- return hipErrorUnsupportedLimit;
- case cudaErrorPeerAccessUnsupported:
- return hipErrorPeerAccessUnsupported;
- case cudaErrorInvalidGraphicsContext:
- return hipErrorInvalidGraphicsContext;
- case cudaErrorSharedObjectSymbolNotFound:
- return hipErrorSharedObjectSymbolNotFound;
- case cudaErrorSharedObjectInitFailed:
- return hipErrorSharedObjectInitFailed;
- case cudaErrorOperatingSystem:
- return hipErrorOperatingSystem;
- case cudaErrorSetOnActiveProcess:
- return hipErrorSetOnActiveProcess;
- case cudaErrorIllegalAddress:
- return hipErrorIllegalAddress;
- case cudaErrorInvalidSymbol:
- return hipErrorInvalidSymbol;
- case cudaErrorMissingConfiguration:
- return hipErrorMissingConfiguration;
- case cudaErrorMemoryAllocation:
- return hipErrorOutOfMemory;
- case cudaErrorInitializationError:
- return hipErrorNotInitialized;
- case cudaErrorLaunchFailure:
- return hipErrorLaunchFailure;
- case cudaErrorCooperativeLaunchTooLarge:
- return hipErrorCooperativeLaunchTooLarge;
- case cudaErrorPriorLaunchFailure:
- return hipErrorPriorLaunchFailure;
- case cudaErrorLaunchOutOfResources:
- return hipErrorLaunchOutOfResources;
- case cudaErrorInvalidDeviceFunction:
- return hipErrorInvalidDeviceFunction;
- case cudaErrorInvalidConfiguration:
- return hipErrorInvalidConfiguration;
- case cudaErrorInvalidDevice:
- return hipErrorInvalidDevice;
- case cudaErrorInvalidValue:
- return hipErrorInvalidValue;
- case cudaErrorInvalidDevicePointer:
- return hipErrorInvalidDevicePointer;
- case cudaErrorInvalidMemcpyDirection:
- return hipErrorInvalidMemcpyDirection;
- case cudaErrorInvalidResourceHandle:
- return hipErrorInvalidHandle;
- case cudaErrorNotReady:
- return hipErrorNotReady;
- case cudaErrorNoDevice:
- return hipErrorNoDevice;
- case cudaErrorPeerAccessAlreadyEnabled:
- return hipErrorPeerAccessAlreadyEnabled;
- case cudaErrorPeerAccessNotEnabled:
- return hipErrorPeerAccessNotEnabled;
- case cudaErrorHostMemoryAlreadyRegistered:
- return hipErrorHostMemoryAlreadyRegistered;
- case cudaErrorHostMemoryNotRegistered:
- return hipErrorHostMemoryNotRegistered;
- case cudaErrorMapBufferObjectFailed:
- return hipErrorMapFailed;
- case cudaErrorAssert:
- return hipErrorAssert;
- case cudaErrorNotSupported:
- return hipErrorNotSupported;
- case cudaErrorCudartUnloading:
- return hipErrorDeinitialized;
- case cudaErrorInvalidKernelImage:
- return hipErrorInvalidImage;
- case cudaErrorUnmapBufferObjectFailed:
- return hipErrorUnmapFailed;
- case cudaErrorNoKernelImageForDevice:
- return hipErrorNoBinaryForGpu;
- case cudaErrorECCUncorrectable:
- return hipErrorECCNotCorrectable;
- case cudaErrorDeviceAlreadyInUse:
- return hipErrorContextAlreadyInUse;
- case cudaErrorInvalidPtx:
- return hipErrorInvalidKernelFile;
- case cudaErrorLaunchTimeout:
- return hipErrorLaunchTimeOut;
-#if CUDA_VERSION >= 10010
- case cudaErrorInvalidSource:
- return hipErrorInvalidSource;
- case cudaErrorFileNotFound:
- return hipErrorFileNotFound;
- case cudaErrorSymbolNotFound:
- return hipErrorNotFound;
- case cudaErrorArrayIsMapped:
- return hipErrorArrayIsMapped;
- case cudaErrorNotMappedAsPointer:
- return hipErrorNotMappedAsPointer;
- case cudaErrorNotMappedAsArray:
- return hipErrorNotMappedAsArray;
- case cudaErrorNotMapped:
- return hipErrorNotMapped;
- case cudaErrorAlreadyAcquired:
- return hipErrorAlreadyAcquired;
- case cudaErrorAlreadyMapped:
- return hipErrorAlreadyMapped;
-#endif
-#if CUDA_VERSION >= 10020
- case cudaErrorDeviceUninitialized:
- return hipErrorInvalidContext;
-#endif
- case cudaErrorUnknown:
- default:
- return hipErrorUnknown; // Note - translated error.
- }
-}
-
-inline static hipError_t hipCUResultTohipError(CUresult cuError) {
- switch (cuError) {
- case CUDA_SUCCESS:
- return hipSuccess;
- case CUDA_ERROR_OUT_OF_MEMORY:
- return hipErrorOutOfMemory;
- case CUDA_ERROR_INVALID_VALUE:
- return hipErrorInvalidValue;
- case CUDA_ERROR_INVALID_DEVICE:
- return hipErrorInvalidDevice;
- case CUDA_ERROR_DEINITIALIZED:
- return hipErrorDeinitialized;
- case CUDA_ERROR_NO_DEVICE:
- return hipErrorNoDevice;
- case CUDA_ERROR_INVALID_CONTEXT:
- return hipErrorInvalidContext;
- case CUDA_ERROR_NOT_INITIALIZED:
- return hipErrorNotInitialized;
- case CUDA_ERROR_INVALID_HANDLE:
- return hipErrorInvalidHandle;
- case CUDA_ERROR_MAP_FAILED:
- return hipErrorMapFailed;
- case CUDA_ERROR_PROFILER_DISABLED:
- return hipErrorProfilerDisabled;
- case CUDA_ERROR_PROFILER_NOT_INITIALIZED:
- return hipErrorProfilerNotInitialized;
- case CUDA_ERROR_PROFILER_ALREADY_STARTED:
- return hipErrorProfilerAlreadyStarted;
- case CUDA_ERROR_PROFILER_ALREADY_STOPPED:
- return hipErrorProfilerAlreadyStopped;
- case CUDA_ERROR_INVALID_IMAGE:
- return hipErrorInvalidImage;
- case CUDA_ERROR_CONTEXT_ALREADY_CURRENT:
- return hipErrorContextAlreadyCurrent;
- case CUDA_ERROR_UNMAP_FAILED:
- return hipErrorUnmapFailed;
- case CUDA_ERROR_ARRAY_IS_MAPPED:
- return hipErrorArrayIsMapped;
- case CUDA_ERROR_ALREADY_MAPPED:
- return hipErrorAlreadyMapped;
- case CUDA_ERROR_NO_BINARY_FOR_GPU:
- return hipErrorNoBinaryForGpu;
- case CUDA_ERROR_ALREADY_ACQUIRED:
- return hipErrorAlreadyAcquired;
- case CUDA_ERROR_NOT_MAPPED:
- return hipErrorNotMapped;
- case CUDA_ERROR_NOT_MAPPED_AS_ARRAY:
- return hipErrorNotMappedAsArray;
- case CUDA_ERROR_NOT_MAPPED_AS_POINTER:
- return hipErrorNotMappedAsPointer;
- case CUDA_ERROR_ECC_UNCORRECTABLE:
- return hipErrorECCNotCorrectable;
- case CUDA_ERROR_UNSUPPORTED_LIMIT:
- return hipErrorUnsupportedLimit;
- case CUDA_ERROR_CONTEXT_ALREADY_IN_USE:
- return hipErrorContextAlreadyInUse;
- case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED:
- return hipErrorPeerAccessUnsupported;
- case CUDA_ERROR_INVALID_PTX:
- return hipErrorInvalidKernelFile;
- case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT:
- return hipErrorInvalidGraphicsContext;
- case CUDA_ERROR_INVALID_SOURCE:
- return hipErrorInvalidSource;
- case CUDA_ERROR_FILE_NOT_FOUND:
- return hipErrorFileNotFound;
- case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND:
- return hipErrorSharedObjectSymbolNotFound;
- case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED:
- return hipErrorSharedObjectInitFailed;
- case CUDA_ERROR_OPERATING_SYSTEM:
- return hipErrorOperatingSystem;
- case CUDA_ERROR_NOT_FOUND:
- return hipErrorNotFound;
- case CUDA_ERROR_NOT_READY:
- return hipErrorNotReady;
- case CUDA_ERROR_ILLEGAL_ADDRESS:
- return hipErrorIllegalAddress;
- case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES:
- return hipErrorLaunchOutOfResources;
- case CUDA_ERROR_LAUNCH_TIMEOUT:
- return hipErrorLaunchTimeOut;
- case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED:
- return hipErrorPeerAccessAlreadyEnabled;
- case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED:
- return hipErrorPeerAccessNotEnabled;
- case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE:
- return hipErrorSetOnActiveProcess;
- case CUDA_ERROR_ASSERT:
- return hipErrorAssert;
- case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED:
- return hipErrorHostMemoryAlreadyRegistered;
- case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED:
- return hipErrorHostMemoryNotRegistered;
- case CUDA_ERROR_LAUNCH_FAILED:
- return hipErrorLaunchFailure;
- case CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE:
- return hipErrorCooperativeLaunchTooLarge;
- case CUDA_ERROR_NOT_SUPPORTED:
- return hipErrorNotSupported;
- case CUDA_ERROR_UNKNOWN:
- default:
- return hipErrorUnknown; // Note - translated error.
- }
-}
-
-inline static cudaError_t hipErrorToCudaError(hipError_t hError) {
- switch (hError) {
- case hipSuccess:
- return cudaSuccess;
- case hipErrorOutOfMemory:
- return cudaErrorMemoryAllocation;
- case hipErrorProfilerDisabled:
- return cudaErrorProfilerDisabled;
- case hipErrorProfilerNotInitialized:
- return cudaErrorProfilerNotInitialized;
- case hipErrorProfilerAlreadyStarted:
- return cudaErrorProfilerAlreadyStarted;
- case hipErrorProfilerAlreadyStopped:
- return cudaErrorProfilerAlreadyStopped;
- case hipErrorInvalidConfiguration:
- return cudaErrorInvalidConfiguration;
- case hipErrorLaunchOutOfResources:
- return cudaErrorLaunchOutOfResources;
- case hipErrorInvalidValue:
- return cudaErrorInvalidValue;
- case hipErrorInvalidHandle:
- return cudaErrorInvalidResourceHandle;
- case hipErrorInvalidDevice:
- return cudaErrorInvalidDevice;
- case hipErrorInvalidMemcpyDirection:
- return cudaErrorInvalidMemcpyDirection;
- case hipErrorInvalidDevicePointer:
- return cudaErrorInvalidDevicePointer;
- case hipErrorNotInitialized:
- return cudaErrorInitializationError;
- case hipErrorNoDevice:
- return cudaErrorNoDevice;
- case hipErrorNotReady:
- return cudaErrorNotReady;
- case hipErrorPeerAccessNotEnabled:
- return cudaErrorPeerAccessNotEnabled;
- case hipErrorPeerAccessAlreadyEnabled:
- return cudaErrorPeerAccessAlreadyEnabled;
- case hipErrorHostMemoryAlreadyRegistered:
- return cudaErrorHostMemoryAlreadyRegistered;
- case hipErrorHostMemoryNotRegistered:
- return cudaErrorHostMemoryNotRegistered;
- case hipErrorDeinitialized:
- return cudaErrorCudartUnloading;
- case hipErrorInvalidSymbol:
- return cudaErrorInvalidSymbol;
- case hipErrorInsufficientDriver:
- return cudaErrorInsufficientDriver;
- case hipErrorMissingConfiguration:
- return cudaErrorMissingConfiguration;
- case hipErrorPriorLaunchFailure:
- return cudaErrorPriorLaunchFailure;
- case hipErrorInvalidDeviceFunction:
- return cudaErrorInvalidDeviceFunction;
- case hipErrorInvalidImage:
- return cudaErrorInvalidKernelImage;
- case hipErrorInvalidContext:
-#if CUDA_VERSION >= 10020
- return cudaErrorDeviceUninitialized;
-#else
- return cudaErrorUnknown;
-#endif
- case hipErrorMapFailed:
- return cudaErrorMapBufferObjectFailed;
- case hipErrorUnmapFailed:
- return cudaErrorUnmapBufferObjectFailed;
- case hipErrorArrayIsMapped:
-#if CUDA_VERSION >= 10010
- return cudaErrorArrayIsMapped;
-#else
- return cudaErrorUnknown;
-#endif
- case hipErrorAlreadyMapped:
-#if CUDA_VERSION >= 10010
- return cudaErrorAlreadyMapped;
-#else
- return cudaErrorUnknown;
-#endif
- case hipErrorNoBinaryForGpu:
- return cudaErrorNoKernelImageForDevice;
- case hipErrorAlreadyAcquired:
-#if CUDA_VERSION >= 10010
- return cudaErrorAlreadyAcquired;
-#else
- return cudaErrorUnknown;
-#endif
- case hipErrorNotMapped:
-#if CUDA_VERSION >= 10010
- return cudaErrorNotMapped;
-#else
- return cudaErrorUnknown;
-#endif
- case hipErrorNotMappedAsArray:
-#if CUDA_VERSION >= 10010
- return cudaErrorNotMappedAsArray;
-#else
- return cudaErrorUnknown;
-#endif
- case hipErrorNotMappedAsPointer:
-#if CUDA_VERSION >= 10010
- return cudaErrorNotMappedAsPointer;
-#else
- return cudaErrorUnknown;
-#endif
- case hipErrorECCNotCorrectable:
- return cudaErrorECCUncorrectable;
- case hipErrorUnsupportedLimit:
- return cudaErrorUnsupportedLimit;
- case hipErrorContextAlreadyInUse:
- return cudaErrorDeviceAlreadyInUse;
- case hipErrorPeerAccessUnsupported:
- return cudaErrorPeerAccessUnsupported;
- case hipErrorInvalidKernelFile:
- return cudaErrorInvalidPtx;
- case hipErrorInvalidGraphicsContext:
- return cudaErrorInvalidGraphicsContext;
- case hipErrorInvalidSource:
-#if CUDA_VERSION >= 10010
- return cudaErrorInvalidSource;
-#else
- return cudaErrorUnknown;
-#endif
- case hipErrorFileNotFound:
-#if CUDA_VERSION >= 10010
- return cudaErrorFileNotFound;
-#else
- return cudaErrorUnknown;
-#endif
- case hipErrorSharedObjectSymbolNotFound:
- return cudaErrorSharedObjectSymbolNotFound;
- case hipErrorSharedObjectInitFailed:
- return cudaErrorSharedObjectInitFailed;
- case hipErrorOperatingSystem:
- return cudaErrorOperatingSystem;
- case hipErrorNotFound:
-#if CUDA_VERSION >= 10010
- return cudaErrorSymbolNotFound;
-#else
- return cudaErrorUnknown;
-#endif
- case hipErrorIllegalAddress:
- return cudaErrorIllegalAddress;
- case hipErrorLaunchTimeOut:
- return cudaErrorLaunchTimeout;
- case hipErrorSetOnActiveProcess:
- return cudaErrorSetOnActiveProcess;
- case hipErrorLaunchFailure:
- return cudaErrorLaunchFailure;
- case hipErrorCooperativeLaunchTooLarge:
- return cudaErrorCooperativeLaunchTooLarge;
- case hipErrorNotSupported:
- return cudaErrorNotSupported;
- // HSA: does not exist in CUDA
- case hipErrorRuntimeMemory:
- // HSA: does not exist in CUDA
- case hipErrorRuntimeOther:
- case hipErrorUnknown:
- case hipErrorTbd:
- default:
- return cudaErrorUnknown; // Note - translated error.
- }
-}
-
-inline static enum cudaMemcpyKind hipMemcpyKindToCudaMemcpyKind(hipMemcpyKind kind) {
- switch (kind) {
- case hipMemcpyHostToHost:
- return cudaMemcpyHostToHost;
- case hipMemcpyHostToDevice:
- return cudaMemcpyHostToDevice;
- case hipMemcpyDeviceToHost:
- return cudaMemcpyDeviceToHost;
- case hipMemcpyDeviceToDevice:
- return cudaMemcpyDeviceToDevice;
- default:
- return cudaMemcpyDefault;
- }
-}
-
-inline static enum cudaTextureAddressMode hipTextureAddressModeToCudaTextureAddressMode(
- hipTextureAddressMode kind) {
- switch (kind) {
- case hipAddressModeWrap:
- return cudaAddressModeWrap;
- case hipAddressModeClamp:
- return cudaAddressModeClamp;
- case hipAddressModeMirror:
- return cudaAddressModeMirror;
- case hipAddressModeBorder:
- return cudaAddressModeBorder;
- default:
- return cudaAddressModeWrap;
- }
-}
-
-inline static enum cudaTextureFilterMode hipTextureFilterModeToCudaTextureFilterMode(
- hipTextureFilterMode kind) {
- switch (kind) {
- case hipFilterModePoint:
- return cudaFilterModePoint;
- case hipFilterModeLinear:
- return cudaFilterModeLinear;
- default:
- return cudaFilterModePoint;
- }
-}
-
-inline static enum cudaTextureReadMode hipTextureReadModeToCudaTextureReadMode(hipTextureReadMode kind) {
- switch (kind) {
- case hipReadModeElementType:
- return cudaReadModeElementType;
- case hipReadModeNormalizedFloat:
- return cudaReadModeNormalizedFloat;
- default:
- return cudaReadModeElementType;
- }
-}
-
-inline static enum cudaChannelFormatKind hipChannelFormatKindToCudaChannelFormatKind(
- hipChannelFormatKind kind) {
- switch (kind) {
- case hipChannelFormatKindSigned:
- return cudaChannelFormatKindSigned;
- case hipChannelFormatKindUnsigned:
- return cudaChannelFormatKindUnsigned;
- case hipChannelFormatKindFloat:
- return cudaChannelFormatKindFloat;
- case hipChannelFormatKindNone:
- return cudaChannelFormatKindNone;
- default:
- return cudaChannelFormatKindNone;
- }
-}
-
-/**
- * Stream CallBack struct
- */
-#define HIPRT_CB CUDART_CB
-typedef void(HIPRT_CB* hipStreamCallback_t)(hipStream_t stream, hipError_t status, void* userData);
-inline static hipError_t hipInit(unsigned int flags) {
- return hipCUResultTohipError(cuInit(flags));
-}
-
-inline static hipError_t hipDeviceReset() { return hipCUDAErrorTohipError(cudaDeviceReset()); }
-
-inline static hipError_t hipGetLastError() { return hipCUDAErrorTohipError(cudaGetLastError()); }
-
-inline static hipError_t hipPeekAtLastError() {
- return hipCUDAErrorTohipError(cudaPeekAtLastError());
-}
-
-inline static hipError_t hipMalloc(void** ptr, size_t size) {
- return hipCUDAErrorTohipError(cudaMalloc(ptr, size));
-}
-
-inline static hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) {
- return hipCUDAErrorTohipError(cudaMallocPitch(ptr, pitch, width, height));
-}
-
-inline static hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr,size_t* pitch,size_t widthInBytes,size_t height,unsigned int elementSizeBytes){
- return hipCUResultTohipError(cuMemAllocPitch(dptr,pitch,widthInBytes,height,elementSizeBytes));
-}
-
-inline static hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) {
- return hipCUDAErrorTohipError(cudaMalloc3D(pitchedDevPtr, extent));
-}
-
-inline static hipError_t hipFree(void* ptr) { return hipCUDAErrorTohipError(cudaFree(ptr)); }
-
-inline static hipError_t hipMallocHost(void** ptr, size_t size)
- __attribute__((deprecated("use hipHostMalloc instead")));
-inline static hipError_t hipMallocHost(void** ptr, size_t size) {
- return hipCUDAErrorTohipError(cudaMallocHost(ptr, size));
-}
-
-inline static hipError_t hipMemAllocHost(void** ptr, size_t size)
- __attribute__((deprecated("use hipHostMalloc instead")));
-inline static hipError_t hipMemAllocHost(void** ptr, size_t size) {
- return hipCUResultTohipError(cuMemAllocHost(ptr, size));
-}
-
-inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags)
- __attribute__((deprecated("use hipHostMalloc instead")));
-inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags) {
- return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags));
-}
-
-inline static hipError_t hipHostMalloc(void** ptr, size_t size, unsigned int flags) {
- return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags));
-}
-
-inline static hipError_t hipMallocManaged(void** ptr, size_t size, unsigned int flags) {
- return hipCUDAErrorTohipError(cudaMallocManaged(ptr, size, flags));
-}
-
-inline static hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc,
- size_t width, size_t height,
- unsigned int flags __dparm(hipArrayDefault)) {
- return hipCUDAErrorTohipError(cudaMallocArray(array, desc, width, height, flags));
-}
-
-inline static hipError_t hipMalloc3DArray(hipArray** array, const hipChannelFormatDesc* desc,
- hipExtent extent, unsigned int flags) {
- return hipCUDAErrorTohipError(cudaMalloc3DArray(array, desc, extent, flags));
-}
-
-inline static hipError_t hipFreeArray(hipArray* array) {
- return hipCUDAErrorTohipError(cudaFreeArray(array));
-}
-
-inline static hipError_t hipHostGetDevicePointer(void** devPtr, void* hostPtr, unsigned int flags) {
- return hipCUDAErrorTohipError(cudaHostGetDevicePointer(devPtr, hostPtr, flags));
-}
-
-inline static hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) {
- return hipCUDAErrorTohipError(cudaHostGetFlags(flagsPtr, hostPtr));
-}
-
-inline static hipError_t hipHostRegister(void* ptr, size_t size, unsigned int flags) {
- return hipCUDAErrorTohipError(cudaHostRegister(ptr, size, flags));
-}
-
-inline static hipError_t hipHostUnregister(void* ptr) {
- return hipCUDAErrorTohipError(cudaHostUnregister(ptr));
-}
-
-inline static hipError_t hipFreeHost(void* ptr)
- __attribute__((deprecated("use hipHostFree instead")));
-inline static hipError_t hipFreeHost(void* ptr) {
- return hipCUDAErrorTohipError(cudaFreeHost(ptr));
-}
-
-inline static hipError_t hipHostFree(void* ptr) {
- return hipCUDAErrorTohipError(cudaFreeHost(ptr));
-}
-
-inline static hipError_t hipSetDevice(int device) {
- return hipCUDAErrorTohipError(cudaSetDevice(device));
-}
-
-inline static hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* prop) {
- struct cudaDeviceProp cdprop;
- memset(&cdprop, 0x0, sizeof(struct cudaDeviceProp));
- cdprop.major = prop->major;
- cdprop.minor = prop->minor;
- cdprop.totalGlobalMem = prop->totalGlobalMem;
- cdprop.sharedMemPerBlock = prop->sharedMemPerBlock;
- cdprop.regsPerBlock = prop->regsPerBlock;
- cdprop.warpSize = prop->warpSize;
- cdprop.maxThreadsPerBlock = prop->maxThreadsPerBlock;
- cdprop.clockRate = prop->clockRate;
- cdprop.totalConstMem = prop->totalConstMem;
- cdprop.multiProcessorCount = prop->multiProcessorCount;
- cdprop.l2CacheSize = prop->l2CacheSize;
- cdprop.maxThreadsPerMultiProcessor = prop->maxThreadsPerMultiProcessor;
- cdprop.computeMode = prop->computeMode;
- cdprop.canMapHostMemory = prop->canMapHostMemory;
- cdprop.memoryClockRate = prop->memoryClockRate;
- cdprop.memoryBusWidth = prop->memoryBusWidth;
- return hipCUDAErrorTohipError(cudaChooseDevice(device, &cdprop));
-}
-
-inline static hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t size) {
- return hipCUResultTohipError(cuMemcpyHtoD(dst, src, size));
-}
-
-inline static hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t size) {
- return hipCUResultTohipError(cuMemcpyDtoH(dst, src, size));
-}
-
-inline static hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t size) {
- return hipCUResultTohipError(cuMemcpyDtoD(dst, src, size));
-}
-
-inline static hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t size,
- hipStream_t stream) {
- return hipCUResultTohipError(cuMemcpyHtoDAsync(dst, src, size, stream));
-}
-
-inline static hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t size,
- hipStream_t stream) {
- return hipCUResultTohipError(cuMemcpyDtoHAsync(dst, src, size, stream));
-}
-
-inline static hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t size,
- hipStream_t stream) {
- return hipCUResultTohipError(cuMemcpyDtoDAsync(dst, src, size, stream));
-}
-
-inline static hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes,
- hipMemcpyKind copyKind) {
- return hipCUDAErrorTohipError(
- cudaMemcpy(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind)));
-}
-
-
-inline static hipError_t hipMemcpyWithStream(void* dst, const void* src,
- size_t sizeBytes, hipMemcpyKind copyKind,
- hipStream_t stream) {
- cudaError_t error = cudaMemcpyAsync(dst, src, sizeBytes,
- hipMemcpyKindToCudaMemcpyKind(copyKind),
- stream);
-
- if (error != cudaSuccess) return hipCUDAErrorTohipError(error);
-
- return hipCUDAErrorTohipError(cudaStreamSynchronize(stream));
-}
-
-inline static hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes,
- hipMemcpyKind copyKind, hipStream_t stream __dparm(0)) {
- return hipCUDAErrorTohipError(
- cudaMemcpyAsync(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind), stream));
-}
-
-inline static hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t sizeBytes,
- size_t offset __dparm(0),
- hipMemcpyKind copyType __dparm(hipMemcpyHostToDevice)) {
- return hipCUDAErrorTohipError(cudaMemcpyToSymbol(symbol, src, sizeBytes, offset,
- hipMemcpyKindToCudaMemcpyKind(copyType)));
-}
-
-inline static hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src,
- size_t sizeBytes, size_t offset,
- hipMemcpyKind copyType,
- hipStream_t stream __dparm(0)) {
- return hipCUDAErrorTohipError(cudaMemcpyToSymbolAsync(
- symbol, src, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(copyType), stream));
-}
-
-inline static hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t sizeBytes,
- size_t offset __dparm(0),
- hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)) {
- return hipCUDAErrorTohipError(cudaMemcpyFromSymbol(dst, symbolName, sizeBytes, offset,
- hipMemcpyKindToCudaMemcpyKind(kind)));
-}
-
-inline static hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName,
- size_t sizeBytes, size_t offset,
- hipMemcpyKind kind,
- hipStream_t stream __dparm(0)) {
- return hipCUDAErrorTohipError(cudaMemcpyFromSymbolAsync(
- dst, symbolName, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(kind), stream));
-}
-
-inline static hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) {
- return hipCUDAErrorTohipError(cudaGetSymbolAddress(devPtr, symbolName));
-}
-
-inline static hipError_t hipGetSymbolSize(size_t* size, const void* symbolName) {
- return hipCUDAErrorTohipError(cudaGetSymbolSize(size, symbolName));
-}
-
-inline static hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch,
- size_t width, size_t height, hipMemcpyKind kind) {
- return hipCUDAErrorTohipError(
- cudaMemcpy2D(dst, dpitch, src, spitch, width, height, hipMemcpyKindToCudaMemcpyKind(kind)));
-}
-
-inline static hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) {
- return hipCUResultTohipError(cuMemcpy2D(pCopy));
-}
-
-inline static hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, hipStream_t stream __dparm(0)) {
- return hipCUResultTohipError(cuMemcpy2DAsync(pCopy, stream));
-}
-
-inline static hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p)
-{
- return hipCUDAErrorTohipError(cudaMemcpy3D(p));
-}
-
-inline static hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms *p, hipStream_t stream)
-{
- return hipCUDAErrorTohipError(cudaMemcpy3DAsync(p, stream));
-}
-
-inline static hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch,
- size_t width, size_t height, hipMemcpyKind kind,
- hipStream_t stream) {
- return hipCUDAErrorTohipError(cudaMemcpy2DAsync(dst, dpitch, src, spitch, width, height,
- hipMemcpyKindToCudaMemcpyKind(kind), stream));
-}
-
-inline static hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset,
- const void* src, size_t spitch, size_t width,
- size_t height, hipMemcpyKind kind) {
- return hipCUDAErrorTohipError(cudaMemcpy2DToArray(dst, wOffset, hOffset, src, spitch, width,
- height, hipMemcpyKindToCudaMemcpyKind(kind)));
-}
-
-__HIP_DEPRECATED inline static hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset,
- size_t hOffset, const void* src,
- size_t count, hipMemcpyKind kind) {
- return hipCUDAErrorTohipError(
- cudaMemcpyToArray(dst, wOffset, hOffset, src, count, hipMemcpyKindToCudaMemcpyKind(kind)));
-}
-
-__HIP_DEPRECATED inline static hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray,
- size_t wOffset, size_t hOffset,
- size_t count, hipMemcpyKind kind) {
- return hipCUDAErrorTohipError(cudaMemcpyFromArray(dst, srcArray, wOffset, hOffset, count,
- hipMemcpyKindToCudaMemcpyKind(kind)));
-}
-
-inline static hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset,
- size_t count) {
- return hipCUResultTohipError(cuMemcpyAtoH(dst, (CUarray)srcArray, srcOffset, count));
-}
-
-inline static hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHost,
- size_t count) {
- return hipCUResultTohipError(cuMemcpyHtoA((CUarray)dstArray, dstOffset, srcHost, count));
-}
-
-inline static hipError_t hipDeviceSynchronize() {
- return hipCUDAErrorTohipError(cudaDeviceSynchronize());
-}
-
-inline static hipError_t hipDeviceGetCacheConfig(hipFuncCache_t* pCacheConfig) {
- return hipCUDAErrorTohipError(cudaDeviceGetCacheConfig(pCacheConfig));
-}
-
-inline static hipError_t hipFuncSetAttribute(const void* func, hipFuncAttribute attr, int value) {
- return hipCUDAErrorTohipError(cudaFuncSetAttribute(func, attr, value));
-}
-
-inline static hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig) {
- return hipCUDAErrorTohipError(cudaDeviceSetCacheConfig(cacheConfig));
-}
-
-inline static hipError_t hipFuncSetSharedMemConfig(const void* func, hipSharedMemConfig config) {
- return hipCUDAErrorTohipError(cudaFuncSetSharedMemConfig(func, config));
-}
-
-inline static const char* hipGetErrorString(hipError_t error) {
- return cudaGetErrorString(hipErrorToCudaError(error));
-}
-
-inline static const char* hipGetErrorName(hipError_t error) {
- return cudaGetErrorName(hipErrorToCudaError(error));
-}
-
-inline static hipError_t hipGetDeviceCount(int* count) {
- return hipCUDAErrorTohipError(cudaGetDeviceCount(count));
-}
-
-inline static hipError_t hipGetDevice(int* device) {
- return hipCUDAErrorTohipError(cudaGetDevice(device));
-}
-
-inline static hipError_t hipIpcCloseMemHandle(void* devPtr) {
- return hipCUDAErrorTohipError(cudaIpcCloseMemHandle(devPtr));
-}
-
-inline static hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event) {
- return hipCUDAErrorTohipError(cudaIpcGetEventHandle(handle, event));
-}
-
-inline static hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr) {
- return hipCUDAErrorTohipError(cudaIpcGetMemHandle(handle, devPtr));
-}
-
-inline static hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle) {
- return hipCUDAErrorTohipError(cudaIpcOpenEventHandle(event, handle));
-}
-
-inline static hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle,
- unsigned int flags) {
- return hipCUDAErrorTohipError(cudaIpcOpenMemHandle(devPtr, handle, flags));
-}
-
-inline static hipError_t hipMemset(void* devPtr, int value, size_t count) {
- return hipCUDAErrorTohipError(cudaMemset(devPtr, value, count));
-}
-
-inline static hipError_t hipMemsetD32(hipDeviceptr_t devPtr, int value, size_t count) {
- return hipCUResultTohipError(cuMemsetD32(devPtr, value, count));
-}
-
-inline static hipError_t hipMemsetAsync(void* devPtr, int value, size_t count,
- hipStream_t stream __dparm(0)) {
- return hipCUDAErrorTohipError(cudaMemsetAsync(devPtr, value, count, stream));
-}
-
-inline static hipError_t hipMemsetD32Async(hipDeviceptr_t devPtr, int value, size_t count,
- hipStream_t stream __dparm(0)) {
- return hipCUResultTohipError(cuMemsetD32Async(devPtr, value, count, stream));
-}
-
-inline static hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes) {
- return hipCUResultTohipError(cuMemsetD8(dest, value, sizeBytes));
-}
-
-inline static hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes,
- hipStream_t stream __dparm(0)) {
- return hipCUResultTohipError(cuMemsetD8Async(dest, value, sizeBytes, stream));
-}
-
-inline static hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes) {
- return hipCUResultTohipError(cuMemsetD16(dest, value, sizeBytes));
-}
-
-inline static hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes,
- hipStream_t stream __dparm(0)) {
- return hipCUResultTohipError(cuMemsetD16Async(dest, value, sizeBytes, stream));
-}
-
-inline static hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) {
- return hipCUDAErrorTohipError(cudaMemset2D(dst, pitch, value, width, height));
-}
-
-inline static hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream __dparm(0)) {
- return hipCUDAErrorTohipError(cudaMemset2DAsync(dst, pitch, value, width, height, stream));
-}
-
-inline static hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent ){
- return hipCUDAErrorTohipError(cudaMemset3D(pitchedDevPtr, value, extent));
-}
-
-inline static hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream __dparm(0) ){
- return hipCUDAErrorTohipError(cudaMemset3DAsync(pitchedDevPtr, value, extent, stream));
-}
-
-inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t* p_prop, int device) {
- struct cudaDeviceProp cdprop;
- cudaError_t cerror;
- cerror = cudaGetDeviceProperties(&cdprop, device);
-
- strncpy(p_prop->name, cdprop.name, 256);
- p_prop->totalGlobalMem = cdprop.totalGlobalMem;
- p_prop->sharedMemPerBlock = cdprop.sharedMemPerBlock;
- p_prop->regsPerBlock = cdprop.regsPerBlock;
- p_prop->warpSize = cdprop.warpSize;
- p_prop->maxThreadsPerBlock = cdprop.maxThreadsPerBlock;
- for (int i = 0; i < 3; i++) {
- p_prop->maxThreadsDim[i] = cdprop.maxThreadsDim[i];
- p_prop->maxGridSize[i] = cdprop.maxGridSize[i];
- }
- p_prop->clockRate = cdprop.clockRate;
- p_prop->memoryClockRate = cdprop.memoryClockRate;
- p_prop->memoryBusWidth = cdprop.memoryBusWidth;
- p_prop->totalConstMem = cdprop.totalConstMem;
- p_prop->major = cdprop.major;
- p_prop->minor = cdprop.minor;
- p_prop->multiProcessorCount = cdprop.multiProcessorCount;
- p_prop->l2CacheSize = cdprop.l2CacheSize;
- p_prop->maxThreadsPerMultiProcessor = cdprop.maxThreadsPerMultiProcessor;
- p_prop->computeMode = cdprop.computeMode;
- p_prop->clockInstructionRate = cdprop.clockRate; // Same as clock-rate:
-
- int ccVers = p_prop->major * 100 + p_prop->minor * 10;
- p_prop->arch.hasGlobalInt32Atomics = (ccVers >= 110);
- p_prop->arch.hasGlobalFloatAtomicExch = (ccVers >= 110);
- p_prop->arch.hasSharedInt32Atomics = (ccVers >= 120);
- p_prop->arch.hasSharedFloatAtomicExch = (ccVers >= 120);
- p_prop->arch.hasFloatAtomicAdd = (ccVers >= 200);
- p_prop->arch.hasGlobalInt64Atomics = (ccVers >= 120);
- p_prop->arch.hasSharedInt64Atomics = (ccVers >= 110);
- p_prop->arch.hasDoubles = (ccVers >= 130);
- p_prop->arch.hasWarpVote = (ccVers >= 120);
- p_prop->arch.hasWarpBallot = (ccVers >= 200);
- p_prop->arch.hasWarpShuffle = (ccVers >= 300);
- p_prop->arch.hasFunnelShift = (ccVers >= 350);
- p_prop->arch.hasThreadFenceSystem = (ccVers >= 200);
- p_prop->arch.hasSyncThreadsExt = (ccVers >= 200);
- p_prop->arch.hasSurfaceFuncs = (ccVers >= 200);
- p_prop->arch.has3dGrid = (ccVers >= 200);
- p_prop->arch.hasDynamicParallelism = (ccVers >= 350);
-
- p_prop->concurrentKernels = cdprop.concurrentKernels;
- p_prop->pciDomainID = cdprop.pciDomainID;
- p_prop->pciBusID = cdprop.pciBusID;
- p_prop->pciDeviceID = cdprop.pciDeviceID;
- p_prop->maxSharedMemoryPerMultiProcessor = cdprop.sharedMemPerMultiprocessor;
- p_prop->isMultiGpuBoard = cdprop.isMultiGpuBoard;
- p_prop->canMapHostMemory = cdprop.canMapHostMemory;
- p_prop->gcnArch = 0; // Not a GCN arch
- p_prop->integrated = cdprop.integrated;
- p_prop->cooperativeLaunch = cdprop.cooperativeLaunch;
- p_prop->cooperativeMultiDeviceLaunch = cdprop.cooperativeMultiDeviceLaunch;
- p_prop->cooperativeMultiDeviceUnmatchedFunc = 0;
- p_prop->cooperativeMultiDeviceUnmatchedGridDim = 0;
- p_prop->cooperativeMultiDeviceUnmatchedBlockDim = 0;
- p_prop->cooperativeMultiDeviceUnmatchedSharedMem = 0;
-
- p_prop->maxTexture1D = cdprop.maxTexture1D;
- p_prop->maxTexture2D[0] = cdprop.maxTexture2D[0];
- p_prop->maxTexture2D[1] = cdprop.maxTexture2D[1];
- p_prop->maxTexture3D[0] = cdprop.maxTexture3D[0];
- p_prop->maxTexture3D[1] = cdprop.maxTexture3D[1];
- p_prop->maxTexture3D[2] = cdprop.maxTexture3D[2];
-
- p_prop->memPitch = cdprop.memPitch;
- p_prop->textureAlignment = cdprop.textureAlignment;
- p_prop->texturePitchAlignment = cdprop.texturePitchAlignment;
- p_prop->kernelExecTimeoutEnabled = cdprop.kernelExecTimeoutEnabled;
- p_prop->ECCEnabled = cdprop.ECCEnabled;
- p_prop->tccDriver = cdprop.tccDriver;
-
- return hipCUDAErrorTohipError(cerror);
-}
-
-inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) {
- enum cudaDeviceAttr cdattr;
- cudaError_t cerror;
-
- switch (attr) {
- case hipDeviceAttributeMaxThreadsPerBlock:
- cdattr = cudaDevAttrMaxThreadsPerBlock;
- break;
- case hipDeviceAttributeMaxBlockDimX:
- cdattr = cudaDevAttrMaxBlockDimX;
- break;
- case hipDeviceAttributeMaxBlockDimY:
- cdattr = cudaDevAttrMaxBlockDimY;
- break;
- case hipDeviceAttributeMaxBlockDimZ:
- cdattr = cudaDevAttrMaxBlockDimZ;
- break;
- case hipDeviceAttributeMaxGridDimX:
- cdattr = cudaDevAttrMaxGridDimX;
- break;
- case hipDeviceAttributeMaxGridDimY:
- cdattr = cudaDevAttrMaxGridDimY;
- break;
- case hipDeviceAttributeMaxGridDimZ:
- cdattr = cudaDevAttrMaxGridDimZ;
- break;
- case hipDeviceAttributeMaxSharedMemoryPerBlock:
- cdattr = cudaDevAttrMaxSharedMemoryPerBlock;
- break;
- case hipDeviceAttributeTotalConstantMemory:
- cdattr = cudaDevAttrTotalConstantMemory;
- break;
- case hipDeviceAttributeWarpSize:
- cdattr = cudaDevAttrWarpSize;
- break;
- case hipDeviceAttributeMaxRegistersPerBlock:
- cdattr = cudaDevAttrMaxRegistersPerBlock;
- break;
- case hipDeviceAttributeClockRate:
- cdattr = cudaDevAttrClockRate;
- break;
- case hipDeviceAttributeMemoryClockRate:
- cdattr = cudaDevAttrMemoryClockRate;
- break;
- case hipDeviceAttributeMemoryBusWidth:
- cdattr = cudaDevAttrGlobalMemoryBusWidth;
- break;
- case hipDeviceAttributeMultiprocessorCount:
- cdattr = cudaDevAttrMultiProcessorCount;
- break;
- case hipDeviceAttributeComputeMode:
- cdattr = cudaDevAttrComputeMode;
- break;
- case hipDeviceAttributeL2CacheSize:
- cdattr = cudaDevAttrL2CacheSize;
- break;
- case hipDeviceAttributeMaxThreadsPerMultiProcessor:
- cdattr = cudaDevAttrMaxThreadsPerMultiProcessor;
- break;
- case hipDeviceAttributeComputeCapabilityMajor:
- cdattr = cudaDevAttrComputeCapabilityMajor;
- break;
- case hipDeviceAttributeComputeCapabilityMinor:
- cdattr = cudaDevAttrComputeCapabilityMinor;
- break;
- case hipDeviceAttributeConcurrentKernels:
- cdattr = cudaDevAttrConcurrentKernels;
- break;
- case hipDeviceAttributePciBusId:
- cdattr = cudaDevAttrPciBusId;
- break;
- case hipDeviceAttributePciDeviceId:
- cdattr = cudaDevAttrPciDeviceId;
- break;
- case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor:
- cdattr = cudaDevAttrMaxSharedMemoryPerMultiprocessor;
- break;
- case hipDeviceAttributeIsMultiGpuBoard:
- cdattr = cudaDevAttrIsMultiGpuBoard;
- break;
- case hipDeviceAttributeIntegrated:
- cdattr = cudaDevAttrIntegrated;
- break;
- case hipDeviceAttributeMaxTexture1DWidth:
- cdattr = cudaDevAttrMaxTexture1DWidth;
- break;
- case hipDeviceAttributeMaxTexture2DWidth:
- cdattr = cudaDevAttrMaxTexture2DWidth;
- break;
- case hipDeviceAttributeMaxTexture2DHeight:
- cdattr = cudaDevAttrMaxTexture2DHeight;
- break;
- case hipDeviceAttributeMaxTexture3DWidth:
- cdattr = cudaDevAttrMaxTexture3DWidth;
- break;
- case hipDeviceAttributeMaxTexture3DHeight:
- cdattr = cudaDevAttrMaxTexture3DHeight;
- break;
- case hipDeviceAttributeMaxTexture3DDepth:
- cdattr = cudaDevAttrMaxTexture3DDepth;
- break;
- case hipDeviceAttributeMaxPitch:
- cdattr = cudaDevAttrMaxPitch;
- break;
- case hipDeviceAttributeTextureAlignment:
- cdattr = cudaDevAttrTextureAlignment;
- break;
- case hipDeviceAttributeTexturePitchAlignment:
- cdattr = cudaDevAttrTexturePitchAlignment;
- break;
- case hipDeviceAttributeKernelExecTimeout:
- cdattr = cudaDevAttrKernelExecTimeout;
- break;
- case hipDeviceAttributeCanMapHostMemory:
- cdattr = cudaDevAttrCanMapHostMemory;
- break;
- case hipDeviceAttributeEccEnabled:
- cdattr = cudaDevAttrEccEnabled;
- break;
- case hipDeviceAttributeCooperativeLaunch:
- cdattr = cudaDevAttrCooperativeLaunch;
- break;
- case hipDeviceAttributeCooperativeMultiDeviceLaunch:
- cdattr = cudaDevAttrCooperativeMultiDeviceLaunch;
- break;
- default:
- return hipCUDAErrorTohipError(cudaErrorInvalidValue);
- }
-
- cerror = cudaDeviceGetAttribute(pi, cdattr, device);
-
- return hipCUDAErrorTohipError(cerror);
-}
-
-inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
- const void* func,
- int blockSize,
- size_t dynamicSMemSize) {
- return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func,
- blockSize, dynamicSMemSize));
-}
-
-inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks,
- const void* func,
- int blockSize,
- size_t dynamicSMemSize,
- unsigned int flags) {
- return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func,
- blockSize, dynamicSMemSize, flags));
-}
-
-inline static hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
- hipFunction_t f,
- int blockSize,
- size_t dynamicSMemSize ){
- return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, f,
- blockSize, dynamicSMemSize));
-}
-
-inline static hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks,
- hipFunction_t f,
- int blockSize,
- size_t dynamicSMemSize,
- unsigned int flags ) {
- return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks,f,
- blockSize, dynamicSMemSize, flags));
-}
-
-//TODO - Match CUoccupancyB2DSize
-inline static hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
- hipFunction_t f, size_t dynSharedMemPerBlk,
- int blockSizeLimit){
- return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSize(gridSize, blockSize, f, NULL,
- dynSharedMemPerBlk, blockSizeLimit));
-}
-
-//TODO - Match CUoccupancyB2DSize
-inline static hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize,
- hipFunction_t f, size_t dynSharedMemPerBlk,
- int blockSizeLimit, unsigned int flags){
- return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSizeWithFlags(gridSize, blockSize, f, NULL,
- dynSharedMemPerBlk, blockSizeLimit, flags));
-}
-
-inline static hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr) {
- struct cudaPointerAttributes cPA;
- hipError_t err = hipCUDAErrorTohipError(cudaPointerGetAttributes(&cPA, ptr));
- if (err == hipSuccess) {
-#if (CUDART_VERSION >= 11000)
- auto memType = cPA.type;
-#else
- unsigned memType = cPA.memoryType; // No auto because cuda 10.2 doesnt force c++11
-#endif
- switch (memType) {
- case cudaMemoryTypeDevice:
- attributes->memoryType = hipMemoryTypeDevice;
- break;
- case cudaMemoryTypeHost:
- attributes->memoryType = hipMemoryTypeHost;
- break;
- default:
- return hipErrorUnknown;
- }
- attributes->device = cPA.device;
- attributes->devicePointer = cPA.devicePointer;
- attributes->hostPointer = cPA.hostPointer;
- attributes->isManaged = 0;
- attributes->allocationFlags = 0;
- }
- return err;
-}
-
-inline static hipError_t hipMemGetInfo(size_t* free, size_t* total) {
- return hipCUDAErrorTohipError(cudaMemGetInfo(free, total));
-}
-
-inline static hipError_t hipEventCreate(hipEvent_t* event) {
- return hipCUDAErrorTohipError(cudaEventCreate(event));
-}
-
-inline static hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream __dparm(NULL)) {
- return hipCUDAErrorTohipError(cudaEventRecord(event, stream));
-}
-
-inline static hipError_t hipEventSynchronize(hipEvent_t event) {
- return hipCUDAErrorTohipError(cudaEventSynchronize(event));
-}
-
-inline static hipError_t hipEventElapsedTime(float* ms, hipEvent_t start, hipEvent_t stop) {
- return hipCUDAErrorTohipError(cudaEventElapsedTime(ms, start, stop));
-}
-
-inline static hipError_t hipEventDestroy(hipEvent_t event) {
- return hipCUDAErrorTohipError(cudaEventDestroy(event));
-}
-
-inline static hipError_t hipStreamCreateWithFlags(hipStream_t* stream, unsigned int flags) {
- return hipCUDAErrorTohipError(cudaStreamCreateWithFlags(stream, flags));
-}
-
-inline static hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority) {
- return hipCUDAErrorTohipError(cudaStreamCreateWithPriority(stream, flags, priority));
-}
-
-inline static hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority) {
- return hipCUDAErrorTohipError(cudaDeviceGetStreamPriorityRange(leastPriority, greatestPriority));
-}
-
-inline static hipError_t hipStreamCreate(hipStream_t* stream) {
- return hipCUDAErrorTohipError(cudaStreamCreate(stream));
-}
-
-inline static hipError_t hipStreamSynchronize(hipStream_t stream) {
- return hipCUDAErrorTohipError(cudaStreamSynchronize(stream));
-}
-
-inline static hipError_t hipStreamDestroy(hipStream_t stream) {
- return hipCUDAErrorTohipError(cudaStreamDestroy(stream));
-}
-
-inline static hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) {
- return hipCUDAErrorTohipError(cudaStreamGetFlags(stream, flags));
-}
-
-inline static hipError_t hipStreamGetPriority(hipStream_t stream, int *priority) {
- return hipCUDAErrorTohipError(cudaStreamGetPriority(stream, priority));
-}
-
-inline static hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event,
- unsigned int flags) {
- return hipCUDAErrorTohipError(cudaStreamWaitEvent(stream, event, flags));
-}
-
-inline static hipError_t hipStreamQuery(hipStream_t stream) {
- return hipCUDAErrorTohipError(cudaStreamQuery(stream));
-}
-
-inline static hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback,
- void* userData, unsigned int flags) {
- return hipCUDAErrorTohipError(
- cudaStreamAddCallback(stream, (cudaStreamCallback_t)callback, userData, flags));
-}
-
-inline static hipError_t hipDriverGetVersion(int* driverVersion) {
- cudaError_t err = cudaDriverGetVersion(driverVersion);
-
- // Override driver version to match version reported on HCC side.
- *driverVersion = 4;
-
- return hipCUDAErrorTohipError(err);
-}
-
-inline static hipError_t hipRuntimeGetVersion(int* runtimeVersion) {
- return hipCUDAErrorTohipError(cudaRuntimeGetVersion(runtimeVersion));
-}
-
-inline static hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int device, int peerDevice) {
- return hipCUDAErrorTohipError(cudaDeviceCanAccessPeer(canAccessPeer, device, peerDevice));
-}
-
-inline static hipError_t hipDeviceDisablePeerAccess(int peerDevice) {
- return hipCUDAErrorTohipError(cudaDeviceDisablePeerAccess(peerDevice));
-}
-
-inline static hipError_t hipDeviceEnablePeerAccess(int peerDevice, unsigned int flags) {
- return hipCUDAErrorTohipError(cudaDeviceEnablePeerAccess(peerDevice, flags));
-}
-
-inline static hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx) {
- return hipCUResultTohipError(cuCtxDisablePeerAccess(peerCtx));
-}
-
-inline static hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) {
- return hipCUResultTohipError(cuCtxEnablePeerAccess(peerCtx, flags));
-}
-
-inline static hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags,
- int* active) {
- return hipCUResultTohipError(cuDevicePrimaryCtxGetState(dev, flags, active));
-}
-
-inline static hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev) {
- return hipCUResultTohipError(cuDevicePrimaryCtxRelease(dev));
-}
-
-inline static hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev) {
- return hipCUResultTohipError(cuDevicePrimaryCtxRetain(pctx, dev));
-}
-
-inline static hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev) {
- return hipCUResultTohipError(cuDevicePrimaryCtxReset(dev));
-}
-
-inline static hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags) {
- return hipCUResultTohipError(cuDevicePrimaryCtxSetFlags(dev, flags));
-}
-
-inline static hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize,
- hipDeviceptr_t dptr) {
- return hipCUResultTohipError(cuMemGetAddressRange(pbase, psize, dptr));
-}
-
-inline static hipError_t hipMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevice,
- size_t count) {
- return hipCUDAErrorTohipError(cudaMemcpyPeer(dst, dstDevice, src, srcDevice, count));
-}
-
-inline static hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src,
- int srcDevice, size_t count,
- hipStream_t stream __dparm(0)) {
- return hipCUDAErrorTohipError(
- cudaMemcpyPeerAsync(dst, dstDevice, src, srcDevice, count, stream));
-}
-
-// Profile APIs:
-inline static hipError_t hipProfilerStart() { return hipCUDAErrorTohipError(cudaProfilerStart()); }
-
-inline static hipError_t hipProfilerStop() { return hipCUDAErrorTohipError(cudaProfilerStop()); }
-
-inline static hipError_t hipGetDeviceFlags(unsigned int* flags) {
- return hipCUDAErrorTohipError(cudaGetDeviceFlags(flags));
-}
-
-inline static hipError_t hipSetDeviceFlags(unsigned int flags) {
- return hipCUDAErrorTohipError(cudaSetDeviceFlags(flags));
-}
-
-inline static hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned int flags) {
- return hipCUDAErrorTohipError(cudaEventCreateWithFlags(event, flags));
-}
-
-inline static hipError_t hipEventQuery(hipEvent_t event) {
- return hipCUDAErrorTohipError(cudaEventQuery(event));
-}
-
-inline static hipError_t hipCtxCreate(hipCtx_t* ctx, unsigned int flags, hipDevice_t device) {
- return hipCUResultTohipError(cuCtxCreate(ctx, flags, device));
-}
-
-inline static hipError_t hipCtxDestroy(hipCtx_t ctx) {
- return hipCUResultTohipError(cuCtxDestroy(ctx));
-}
-
-inline static hipError_t hipCtxPopCurrent(hipCtx_t* ctx) {
- return hipCUResultTohipError(cuCtxPopCurrent(ctx));
-}
-
-inline static hipError_t hipCtxPushCurrent(hipCtx_t ctx) {
- return hipCUResultTohipError(cuCtxPushCurrent(ctx));
-}
-
-inline static hipError_t hipCtxSetCurrent(hipCtx_t ctx) {
- return hipCUResultTohipError(cuCtxSetCurrent(ctx));
-}
-
-inline static hipError_t hipCtxGetCurrent(hipCtx_t* ctx) {
- return hipCUResultTohipError(cuCtxGetCurrent(ctx));
-}
-
-inline static hipError_t hipCtxGetDevice(hipDevice_t* device) {
- return hipCUResultTohipError(cuCtxGetDevice(device));
-}
-
-inline static hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion) {
- return hipCUResultTohipError(cuCtxGetApiVersion(ctx, (unsigned int*)apiVersion));
-}
-
-inline static hipError_t hipCtxGetCacheConfig(hipFuncCache* cacheConfig) {
- return hipCUResultTohipError(cuCtxGetCacheConfig(cacheConfig));
-}
-
-inline static hipError_t hipCtxSetCacheConfig(hipFuncCache cacheConfig) {
- return hipCUResultTohipError(cuCtxSetCacheConfig(cacheConfig));
-}
-
-inline static hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config) {
- return hipCUResultTohipError(cuCtxSetSharedMemConfig((CUsharedconfig)config));
-}
-
-inline static hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig) {
- return hipCUResultTohipError(cuCtxGetSharedMemConfig((CUsharedconfig*)pConfig));
-}
-
-inline static hipError_t hipCtxSynchronize(void) {
- return hipCUResultTohipError(cuCtxSynchronize());
-}
-
-inline static hipError_t hipCtxGetFlags(unsigned int* flags) {
- return hipCUResultTohipError(cuCtxGetFlags(flags));
-}
-
-inline static hipError_t hipCtxDetach(hipCtx_t ctx) {
- return hipCUResultTohipError(cuCtxDetach(ctx));
-}
-
-inline static hipError_t hipDeviceGet(hipDevice_t* device, int ordinal) {
- return hipCUResultTohipError(cuDeviceGet(device, ordinal));
-}
-
-inline static hipError_t hipDeviceComputeCapability(int* major, int* minor, hipDevice_t device) {
- return hipCUResultTohipError(cuDeviceComputeCapability(major, minor, device));
-}
-
-inline static hipError_t hipDeviceGetName(char* name, int len, hipDevice_t device) {
- return hipCUResultTohipError(cuDeviceGetName(name, len, device));
-}
-
-inline static hipError_t hipDeviceGetP2PAttribute(int* value, hipDeviceP2PAttr attr,
- int srcDevice, int dstDevice) {
- return hipCUDAErrorTohipError(cudaDeviceGetP2PAttribute(value, attr, srcDevice, dstDevice));
-}
-
-inline static hipError_t hipDeviceGetPCIBusId(char* pciBusId, int len, hipDevice_t device) {
- return hipCUDAErrorTohipError(cudaDeviceGetPCIBusId(pciBusId, len, device));
-}
-
-inline static hipError_t hipDeviceGetByPCIBusId(int* device, const char* pciBusId) {
- return hipCUDAErrorTohipError(cudaDeviceGetByPCIBusId(device, pciBusId));
-}
-
-inline static hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig* config) {
- return hipCUDAErrorTohipError(cudaDeviceGetSharedMemConfig(config));
-}
-
-inline static hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config) {
- return hipCUDAErrorTohipError(cudaDeviceSetSharedMemConfig(config));
-}
-
-inline static hipError_t hipDeviceGetLimit(size_t* pValue, hipLimit_t limit) {
- return hipCUDAErrorTohipError(cudaDeviceGetLimit(pValue, limit));
-}
-
-inline static hipError_t hipDeviceTotalMem(size_t* bytes, hipDevice_t device) {
- return hipCUResultTohipError(cuDeviceTotalMem(bytes, device));
-}
-
-inline static hipError_t hipModuleLoad(hipModule_t* module, const char* fname) {
- return hipCUResultTohipError(cuModuleLoad(module, fname));
-}
-
-inline static hipError_t hipModuleUnload(hipModule_t hmod) {
- return hipCUResultTohipError(cuModuleUnload(hmod));
-}
-
-inline static hipError_t hipModuleGetFunction(hipFunction_t* function, hipModule_t module,
- const char* kname) {
- return hipCUResultTohipError(cuModuleGetFunction(function, module, kname));
-}
-
-inline static hipError_t hipModuleGetTexRef(hipTexRef* pTexRef, hipModule_t hmod, const char* name){
- hipCUResultTohipError(cuModuleGetTexRef(pTexRef, hmod, name));
-}
-
-inline static hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) {
- return hipCUDAErrorTohipError(cudaFuncGetAttributes(attr, func));
-}
-
-inline static hipError_t hipFuncGetAttribute (int* value, hipFunction_attribute attrib, hipFunction_t hfunc) {
- return hipCUResultTohipError(cuFuncGetAttribute(value, attrib, hfunc));
-}
-
-inline static hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod,
- const char* name) {
- return hipCUResultTohipError(cuModuleGetGlobal(dptr, bytes, hmod, name));
-}
-
-inline static hipError_t hipModuleLoadData(hipModule_t* module, const void* image) {
- return hipCUResultTohipError(cuModuleLoadData(module, image));
-}
-
-inline static hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image,
- unsigned int numOptions, hipJitOption* options,
- void** optionValues) {
- return hipCUResultTohipError(
- cuModuleLoadDataEx(module, image, numOptions, options, optionValues));
-}
-
-inline static hipError_t hipLaunchKernel(const void* function_address, dim3 numBlocks,
- dim3 dimBlocks, void** args, size_t sharedMemBytes,
- hipStream_t stream)
-{
- return hipCUDAErrorTohipError(cudaLaunchKernel(function_address,numBlocks,dimBlocks,args,sharedMemBytes,stream));
-}
-
-inline static hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX,
- unsigned int gridDimY, unsigned int gridDimZ,
- unsigned int blockDimX, unsigned int blockDimY,
- unsigned int blockDimZ, unsigned int sharedMemBytes,
- hipStream_t stream, void** kernelParams,
- void** extra) {
- return hipCUResultTohipError(cuLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX,
- blockDimY, blockDimZ, sharedMemBytes, stream,
- kernelParams, extra));
-}
-
-inline static hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t cacheConfig) {
- return hipCUDAErrorTohipError(cudaFuncSetCacheConfig(func, cacheConfig));
-}
-
-__HIP_DEPRECATED inline static hipError_t hipBindTexture(size_t* offset,
- struct textureReference* tex,
- const void* devPtr,
- const hipChannelFormatDesc* desc,
- size_t size __dparm(UINT_MAX)) {
- return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
-}
-
-__HIP_DEPRECATED inline static hipError_t hipBindTexture2D(
- size_t* offset, struct textureReference* tex, const void* devPtr,
- const hipChannelFormatDesc* desc, size_t width, size_t height, size_t pitch) {
- return hipCUDAErrorTohipError(cudaBindTexture2D(offset, tex, devPtr, desc, width, height, pitch));
-}
-
-inline static hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w,
- hipChannelFormatKind f) {
- return cudaCreateChannelDesc(x, y, z, w, hipChannelFormatKindToCudaChannelFormatKind(f));
-}
-
-inline static hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject,
- const hipResourceDesc* pResDesc,
- const hipTextureDesc* pTexDesc,
- const hipResourceViewDesc* pResViewDesc) {
- return hipCUDAErrorTohipError(
- cudaCreateTextureObject(pTexObject, pResDesc, pTexDesc, pResViewDesc));
-}
-
-inline static hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject) {
- return hipCUDAErrorTohipError(cudaDestroyTextureObject(textureObject));
-}
-
-inline static hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject,
- const hipResourceDesc* pResDesc) {
- return hipCUDAErrorTohipError(cudaCreateSurfaceObject(pSurfObject, pResDesc));
-}
-
-inline static hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject) {
- return hipCUDAErrorTohipError(cudaDestroySurfaceObject(surfaceObject));
-}
-
-inline static hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc,
- hipTextureObject_t textureObject) {
- return hipCUDAErrorTohipError(cudaGetTextureObjectResourceDesc( pResDesc, textureObject));
-}
-
-__HIP_DEPRECATED inline static hipError_t hipGetTextureAlignmentOffset(
- size_t* offset, const struct textureReference* texref) {
- return hipCUDAErrorTohipError(cudaGetTextureAlignmentOffset(offset,texref));
-}
-
-inline static hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array)
-{
- return hipCUDAErrorTohipError(cudaGetChannelDesc(desc,array));
-}
-
-inline static hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDim,
- void** kernelParams, unsigned int sharedMemBytes,
- hipStream_t stream) {
- return hipCUDAErrorTohipError(
- cudaLaunchCooperativeKernel(f, gridDim, blockDim, kernelParams, sharedMemBytes, stream));
-}
-
-inline static hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
- int numDevices, unsigned int flags) {
- return hipCUDAErrorTohipError(cudaLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags));
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#ifdef __CUDACC__
-
-template<class T>
-inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
- T func,
- int blockSize,
- size_t dynamicSMemSize) {
- return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func,
- blockSize, dynamicSMemSize));
-}
-
-template <class T>
-inline static hipError_t hipOccupancyMaxPotentialBlockSize(int* minGridSize, int* blockSize, T func,
- size_t dynamicSMemSize = 0,
- int blockSizeLimit = 0) {
- return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func,
- dynamicSMemSize, blockSizeLimit));
-}
-
-template <class T>
-inline static hipError_t hipOccupancyMaxPotentialBlockSizeWithFlags(int* minGridSize, int* blockSize, T func,
- size_t dynamicSMemSize = 0,
- int blockSizeLimit = 0, unsigned int flags = 0) {
- return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func,
- dynamicSMemSize, blockSizeLimit, flags));
-}
-
-template <class T>
-inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( int* numBlocks, T func,
- int blockSize, size_t dynamicSMemSize,unsigned int flags) {
- return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func,
- blockSize, dynamicSMemSize, flags));
-}
-
-template <class T, int dim, enum cudaTextureReadMode readMode>
-inline static hipError_t hipBindTexture(size_t* offset, const struct texture<T, dim, readMode>& tex,
- const void* devPtr, size_t size = UINT_MAX) {
- return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, size));
-}
-
-template <class T, int dim, enum cudaTextureReadMode readMode>
-inline static hipError_t hipBindTexture(size_t* offset, struct texture<T, dim, readMode>& tex,
- const void* devPtr, const hipChannelFormatDesc& desc,
- size_t size = UINT_MAX) {
- return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
-}
-
-template <class T, int dim, enum cudaTextureReadMode readMode>
-__HIP_DEPRECATED inline static hipError_t hipUnbindTexture(struct texture<T, dim, readMode>* tex) {
- return hipCUDAErrorTohipError(cudaUnbindTexture(tex));
-}
-
-template <class T, int dim, enum cudaTextureReadMode readMode>
-__HIP_DEPRECATED inline static hipError_t hipUnbindTexture(struct texture<T, dim, readMode>& tex) {
- return hipCUDAErrorTohipError(cudaUnbindTexture(tex));
-}
-
-template <class T, int dim, enum cudaTextureReadMode readMode>
-__HIP_DEPRECATED inline static hipError_t hipBindTextureToArray(
- struct texture<T, dim, readMode>& tex, hipArray_const_t array,
- const hipChannelFormatDesc& desc) {
- return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc));
-}
-
-template <class T, int dim, enum cudaTextureReadMode readMode>
-__HIP_DEPRECATED inline static hipError_t hipBindTextureToArray(
- struct texture<T, dim, readMode>* tex, hipArray_const_t array,
- const hipChannelFormatDesc* desc) {
- return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc));
-}
-
-template <class T, int dim, enum cudaTextureReadMode readMode>
-__HIP_DEPRECATED inline static hipError_t hipBindTextureToArray(
- struct texture<T, dim, readMode>& tex, hipArray_const_t array) {
- return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array));
-}
-
-template <class T>
-inline static hipChannelFormatDesc hipCreateChannelDesc() {
- return cudaCreateChannelDesc<T>();
-}
-
-template <class T>
-inline static hipError_t hipLaunchCooperativeKernel(T f, dim3 gridDim, dim3 blockDim,
- void** kernelParams, unsigned int sharedMemBytes, hipStream_t stream) {
- return hipCUDAErrorTohipError(
- cudaLaunchCooperativeKernel(reinterpret_cast<const void*>(f), gridDim, blockDim, kernelParams, sharedMemBytes, stream));
-}
-
-inline static hipError_t hipTexRefSetAddressMode(hipTexRef hTexRef, int dim, hipAddress_mode am){
- return hipCUResultTohipError(cuTexRefSetAddressMode(hTexRef,dim,am));
-}
-
-inline static hipError_t hipTexRefSetFilterMode(hipTexRef hTexRef, hipFilter_mode fm){
- return hipCUResultTohipError(cuTexRefSetFilterMode(hTexRef,fm));
-}
-
-inline static hipError_t hipTexRefSetAddress(size_t *ByteOffset, hipTexRef hTexRef, hipDeviceptr_t dptr, size_t bytes){
- return hipCUResultTohipError(cuTexRefSetAddress(ByteOffset,hTexRef,dptr,bytes));
-}
-
-inline static hipError_t hipTexRefSetAddress2D(hipTexRef hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, hipDeviceptr_t dptr, size_t Pitch){
- return hipCUResultTohipError(cuTexRefSetAddress2D(hTexRef,desc,dptr,Pitch));
-}
-
-inline static hipError_t hipTexRefSetFormat(hipTexRef hTexRef, hipArray_Format fmt, int NumPackedComponents){
- return hipCUResultTohipError(cuTexRefSetFormat(hTexRef,fmt,NumPackedComponents));
-}
-
-inline static hipError_t hipTexRefSetFlags(hipTexRef hTexRef, unsigned int Flags){
- return hipCUResultTohipError(cuTexRefSetFlags(hTexRef,Flags));
-}
-
-inline static hipError_t hipTexRefSetArray(hipTexRef hTexRef, hiparray hArray, unsigned int Flags){
- return hipCUResultTohipError(cuTexRefSetArray(hTexRef,hArray,Flags));
-}
-
-inline static hipError_t hipArrayCreate(hiparray* pHandle, const HIP_ARRAY_DESCRIPTOR* pAllocateArray){
- return hipCUResultTohipError(cuArrayCreate(pHandle, pAllocateArray));
-}
-
-inline static hipError_t hipArrayDestroy(hiparray hArray){
- return hipCUResultTohipError(cuArrayDestroy(hArray));
-}
-
-#endif //__CUDACC__
-
-#endif // HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
diff --git a/third_party/rocm/include/hip/nvcc_detail/hip_texture_types.h b/third_party/rocm/include/hip/nvcc_detail/hip_texture_types.h
deleted file mode 100644
index 751dd8e..0000000
--- a/third_party/rocm/include/hip/nvcc_detail/hip_texture_types.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_TEXTURE_TYPES_H
-#define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_TEXTURE_TYPES_H
-
-#include <texture_types.h>
-
-#endif
diff --git a/third_party/rocm/include/hip/texture_types.h b/third_party/rocm/include/hip/texture_types.h
deleted file mode 100644
index 7d78570..0000000
--- a/third_party/rocm/include/hip/texture_types.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_TEXTURE_TYPES_H
-#define HIP_INCLUDE_HIP_TEXTURE_TYPES_H
-
-#include <hip/hip_common.h>
-
-#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
-#include <hip/hcc_detail/texture_types.h>
-#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
-#include "texture_types.h"
-#else
-#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
-#endif
-
-#endif
diff --git a/third_party/rocm/include/hsa/Brig.h b/third_party/rocm/include/hsa/Brig.h
deleted file mode 100644
index 4f34bd1..0000000
--- a/third_party/rocm/include/hsa/Brig.h
+++ /dev/null
@@ -1,1131 +0,0 @@
-// University of Illinois/NCSA
-// Open Source License
-//
-// Copyright (c) 2013-2015, Advanced Micro Devices, Inc.
-// All rights reserved.
-//
-// Developed by:
-//
-// HSA Team
-//
-// Advanced Micro Devices, Inc
-//
-// www.amd.com
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy of
-// this software and associated documentation files (the "Software"), to deal with
-// the Software without restriction, including without limitation the rights to
-// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-// of the Software, and to permit persons to whom the Software is furnished to do
-// so, subject to the following conditions:
-//
-// * Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimers.
-//
-// * Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimers in the
-// documentation and/or other materials provided with the distribution.
-//
-// * Neither the names of the LLVM Team, University of Illinois at
-// Urbana-Champaign, nor the names of its contributors may be used to
-// endorse or promote products derived from this Software without specific
-// prior written permission.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
-// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
-// SOFTWARE.
-
-#ifndef INCLUDED_BRIG_H
-#define INCLUDED_BRIG_H
-
-#include <stddef.h> /* size_t */
-#include <stdint.h> /* uintXX_t */
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*========================================================================================*/
-/* =======================================================================================*/
-/* =======================================================================================*/
-/* =======================================================================================*/
-
-typedef uint32_t BrigCodeOffset32_t;
-typedef uint32_t BrigOperandOffset32_t;
-typedef uint32_t BrigDataOffset32_t;
-
-typedef BrigDataOffset32_t BrigDataOffsetCodeList32_t;
-typedef BrigDataOffset32_t BrigDataOffsetOperandList32_t;
-typedef BrigDataOffset32_t BrigDataOffsetString32_t;
-
-typedef uint32_t BrigVersion32_t;
-enum BrigVersion {
- BRIG_VERSION_HSAIL_MAJOR = 1,
- BRIG_VERSION_HSAIL_MINOR = 0,
- BRIG_VERSION_BRIG_MAJOR = 1,
- BRIG_VERSION_BRIG_MINOR = 0
-};
-
-typedef uint16_t BrigKind16_t;
-enum BrigKind {
- BRIG_KIND_NONE = 0x0000,
-
- BRIG_KIND_DIRECTIVE_BEGIN = 0x1000,
- BRIG_KIND_DIRECTIVE_ARG_BLOCK_END = 0x1000,
- BRIG_KIND_DIRECTIVE_ARG_BLOCK_START = 0x1001,
- BRIG_KIND_DIRECTIVE_COMMENT = 0x1002,
- BRIG_KIND_DIRECTIVE_CONTROL = 0x1003,
- BRIG_KIND_DIRECTIVE_EXTENSION = 0x1004,
- BRIG_KIND_DIRECTIVE_FBARRIER = 0x1005,
- BRIG_KIND_DIRECTIVE_FUNCTION = 0x1006,
- BRIG_KIND_DIRECTIVE_INDIRECT_FUNCTION = 0x1007,
- BRIG_KIND_DIRECTIVE_KERNEL = 0x1008,
- BRIG_KIND_DIRECTIVE_LABEL = 0x1009,
- BRIG_KIND_DIRECTIVE_LOC = 0x100a,
- BRIG_KIND_DIRECTIVE_MODULE = 0x100b,
- BRIG_KIND_DIRECTIVE_PRAGMA = 0x100c,
- BRIG_KIND_DIRECTIVE_SIGNATURE = 0x100d,
- BRIG_KIND_DIRECTIVE_VARIABLE = 0x100e,
- BRIG_KIND_DIRECTIVE_END = 0x100f,
-
- BRIG_KIND_INST_BEGIN = 0x2000,
- BRIG_KIND_INST_ADDR = 0x2000,
- BRIG_KIND_INST_ATOMIC = 0x2001,
- BRIG_KIND_INST_BASIC = 0x2002,
- BRIG_KIND_INST_BR = 0x2003,
- BRIG_KIND_INST_CMP = 0x2004,
- BRIG_KIND_INST_CVT = 0x2005,
- BRIG_KIND_INST_IMAGE = 0x2006,
- BRIG_KIND_INST_LANE = 0x2007,
- BRIG_KIND_INST_MEM = 0x2008,
- BRIG_KIND_INST_MEM_FENCE = 0x2009,
- BRIG_KIND_INST_MOD = 0x200a,
- BRIG_KIND_INST_QUERY_IMAGE = 0x200b,
- BRIG_KIND_INST_QUERY_SAMPLER = 0x200c,
- BRIG_KIND_INST_QUEUE = 0x200d,
- BRIG_KIND_INST_SEG = 0x200e,
- BRIG_KIND_INST_SEG_CVT = 0x200f,
- BRIG_KIND_INST_SIGNAL = 0x2010,
- BRIG_KIND_INST_SOURCE_TYPE = 0x2011,
- BRIG_KIND_INST_END = 0x2012,
-
- BRIG_KIND_OPERAND_BEGIN = 0x3000,
- BRIG_KIND_OPERAND_ADDRESS = 0x3000,
- BRIG_KIND_OPERAND_ALIGN = 0x3001,
- BRIG_KIND_OPERAND_CODE_LIST = 0x3002,
- BRIG_KIND_OPERAND_CODE_REF = 0x3003,
- BRIG_KIND_OPERAND_CONSTANT_BYTES = 0x3004,
- BRIG_KIND_OPERAND_RESERVED = 0x3005,
- BRIG_KIND_OPERAND_CONSTANT_IMAGE = 0x3006,
- BRIG_KIND_OPERAND_CONSTANT_OPERAND_LIST = 0x3007,
- BRIG_KIND_OPERAND_CONSTANT_SAMPLER = 0x3008,
- BRIG_KIND_OPERAND_OPERAND_LIST = 0x3009,
- BRIG_KIND_OPERAND_REGISTER = 0x300a,
- BRIG_KIND_OPERAND_STRING = 0x300b,
- BRIG_KIND_OPERAND_WAVESIZE = 0x300c,
- BRIG_KIND_OPERAND_END = 0x300d
-};
-
-typedef uint8_t BrigAlignment8_t;
-enum BrigAlignment {
- BRIG_ALIGNMENT_NONE = 0,
- BRIG_ALIGNMENT_1 = 1,
- BRIG_ALIGNMENT_2 = 2,
- BRIG_ALIGNMENT_4 = 3,
- BRIG_ALIGNMENT_8 = 4,
- BRIG_ALIGNMENT_16 = 5,
- BRIG_ALIGNMENT_32 = 6,
- BRIG_ALIGNMENT_64 = 7,
- BRIG_ALIGNMENT_128 = 8,
- BRIG_ALIGNMENT_256 = 9,
- BRIG_ALIGNMENT_MAX = BRIG_ALIGNMENT_256
-};
-
-typedef uint8_t BrigAllocation8_t;
-enum BrigAllocation {
- BRIG_ALLOCATION_NONE = 0,
- BRIG_ALLOCATION_PROGRAM = 1,
- BRIG_ALLOCATION_AGENT = 2,
- BRIG_ALLOCATION_AUTOMATIC = 3
-};
-
-typedef uint8_t BrigAluModifier8_t;
-enum BrigAluModifierMask {
- BRIG_ALU_FTZ = 1
-};
-
-typedef uint8_t BrigAtomicOperation8_t;
-enum BrigAtomicOperation {
- BRIG_ATOMIC_ADD = 0,
- BRIG_ATOMIC_AND = 1,
- BRIG_ATOMIC_CAS = 2,
- BRIG_ATOMIC_EXCH = 3,
- BRIG_ATOMIC_LD = 4,
- BRIG_ATOMIC_MAX = 5,
- BRIG_ATOMIC_MIN = 6,
- BRIG_ATOMIC_OR = 7,
- BRIG_ATOMIC_ST = 8,
- BRIG_ATOMIC_SUB = 9,
- BRIG_ATOMIC_WRAPDEC = 10,
- BRIG_ATOMIC_WRAPINC = 11,
- BRIG_ATOMIC_XOR = 12,
- BRIG_ATOMIC_WAIT_EQ = 13,
- BRIG_ATOMIC_WAIT_NE = 14,
- BRIG_ATOMIC_WAIT_LT = 15,
- BRIG_ATOMIC_WAIT_GTE = 16,
- BRIG_ATOMIC_WAITTIMEOUT_EQ = 17,
- BRIG_ATOMIC_WAITTIMEOUT_NE = 18,
- BRIG_ATOMIC_WAITTIMEOUT_LT = 19,
- BRIG_ATOMIC_WAITTIMEOUT_GTE = 20
-};
-
-typedef uint8_t BrigCompareOperation8_t;
-enum BrigCompareOperation {
- BRIG_COMPARE_EQ = 0,
- BRIG_COMPARE_NE = 1,
- BRIG_COMPARE_LT = 2,
- BRIG_COMPARE_LE = 3,
- BRIG_COMPARE_GT = 4,
- BRIG_COMPARE_GE = 5,
- BRIG_COMPARE_EQU = 6,
- BRIG_COMPARE_NEU = 7,
- BRIG_COMPARE_LTU = 8,
- BRIG_COMPARE_LEU = 9,
- BRIG_COMPARE_GTU = 10,
- BRIG_COMPARE_GEU = 11,
- BRIG_COMPARE_NUM = 12,
- BRIG_COMPARE_NAN = 13,
- BRIG_COMPARE_SEQ = 14,
- BRIG_COMPARE_SNE = 15,
- BRIG_COMPARE_SLT = 16,
- BRIG_COMPARE_SLE = 17,
- BRIG_COMPARE_SGT = 18,
- BRIG_COMPARE_SGE = 19,
- BRIG_COMPARE_SGEU = 20,
- BRIG_COMPARE_SEQU = 21,
- BRIG_COMPARE_SNEU = 22,
- BRIG_COMPARE_SLTU = 23,
- BRIG_COMPARE_SLEU = 24,
- BRIG_COMPARE_SNUM = 25,
- BRIG_COMPARE_SNAN = 26,
- BRIG_COMPARE_SGTU = 27
-};
-
-typedef uint16_t BrigControlDirective16_t;
-enum BrigControlDirective {
- BRIG_CONTROL_NONE = 0,
- BRIG_CONTROL_ENABLEBREAKEXCEPTIONS = 1,
- BRIG_CONTROL_ENABLEDETECTEXCEPTIONS = 2,
- BRIG_CONTROL_MAXDYNAMICGROUPSIZE = 3,
- BRIG_CONTROL_MAXFLATGRIDSIZE = 4,
- BRIG_CONTROL_MAXFLATWORKGROUPSIZE = 5,
- BRIG_CONTROL_REQUIREDDIM = 6,
- BRIG_CONTROL_REQUIREDGRIDSIZE = 7,
- BRIG_CONTROL_REQUIREDWORKGROUPSIZE = 8,
- BRIG_CONTROL_REQUIRENOPARTIALWORKGROUPS = 9
-};
-
-typedef uint8_t BrigExecutableModifier8_t;
-enum BrigExecutableModifierMask {
- BRIG_EXECUTABLE_DEFINITION = 1
-};
-
-typedef uint8_t BrigImageChannelOrder8_t;
-enum BrigImageChannelOrder {
- BRIG_CHANNEL_ORDER_A = 0,
- BRIG_CHANNEL_ORDER_R = 1,
- BRIG_CHANNEL_ORDER_RX = 2,
- BRIG_CHANNEL_ORDER_RG = 3,
- BRIG_CHANNEL_ORDER_RGX = 4,
- BRIG_CHANNEL_ORDER_RA = 5,
- BRIG_CHANNEL_ORDER_RGB = 6,
- BRIG_CHANNEL_ORDER_RGBX = 7,
- BRIG_CHANNEL_ORDER_RGBA = 8,
- BRIG_CHANNEL_ORDER_BGRA = 9,
- BRIG_CHANNEL_ORDER_ARGB = 10,
- BRIG_CHANNEL_ORDER_ABGR = 11,
- BRIG_CHANNEL_ORDER_SRGB = 12,
- BRIG_CHANNEL_ORDER_SRGBX = 13,
- BRIG_CHANNEL_ORDER_SRGBA = 14,
- BRIG_CHANNEL_ORDER_SBGRA = 15,
- BRIG_CHANNEL_ORDER_INTENSITY = 16,
- BRIG_CHANNEL_ORDER_LUMINANCE = 17,
- BRIG_CHANNEL_ORDER_DEPTH = 18,
- BRIG_CHANNEL_ORDER_DEPTH_STENCIL = 19,
-
- BRIG_CHANNEL_ORDER_FIRST_USER_DEFINED = 128
-};
-
-typedef uint8_t BrigImageChannelType8_t;
-enum BrigImageChannelType {
- BRIG_CHANNEL_TYPE_SNORM_INT8 = 0,
- BRIG_CHANNEL_TYPE_SNORM_INT16 = 1,
- BRIG_CHANNEL_TYPE_UNORM_INT8 = 2,
- BRIG_CHANNEL_TYPE_UNORM_INT16 = 3,
- BRIG_CHANNEL_TYPE_UNORM_INT24 = 4,
- BRIG_CHANNEL_TYPE_UNORM_SHORT_555 = 5,
- BRIG_CHANNEL_TYPE_UNORM_SHORT_565 = 6,
- BRIG_CHANNEL_TYPE_UNORM_INT_101010 = 7,
- BRIG_CHANNEL_TYPE_SIGNED_INT8 = 8,
- BRIG_CHANNEL_TYPE_SIGNED_INT16 = 9,
- BRIG_CHANNEL_TYPE_SIGNED_INT32 = 10,
- BRIG_CHANNEL_TYPE_UNSIGNED_INT8 = 11,
- BRIG_CHANNEL_TYPE_UNSIGNED_INT16 = 12,
- BRIG_CHANNEL_TYPE_UNSIGNED_INT32 = 13,
- BRIG_CHANNEL_TYPE_HALF_FLOAT = 14,
- BRIG_CHANNEL_TYPE_FLOAT = 15,
-
- BRIG_CHANNEL_TYPE_FIRST_USER_DEFINED = 128
-};
-
-typedef uint8_t BrigImageGeometry8_t;
-enum BrigImageGeometry {
- BRIG_GEOMETRY_1D = 0,
- BRIG_GEOMETRY_2D = 1,
- BRIG_GEOMETRY_3D = 2,
- BRIG_GEOMETRY_1DA = 3,
- BRIG_GEOMETRY_2DA = 4,
- BRIG_GEOMETRY_1DB = 5,
- BRIG_GEOMETRY_2DDEPTH = 6,
- BRIG_GEOMETRY_2DADEPTH = 7,
-
- BRIG_GEOMETRY_FIRST_USER_DEFINED = 128
-};
-
-typedef uint8_t BrigImageQuery8_t;
-enum BrigImageQuery {
- BRIG_IMAGE_QUERY_WIDTH = 0,
- BRIG_IMAGE_QUERY_HEIGHT = 1,
- BRIG_IMAGE_QUERY_DEPTH = 2,
- BRIG_IMAGE_QUERY_ARRAY = 3,
- BRIG_IMAGE_QUERY_CHANNELORDER = 4,
- BRIG_IMAGE_QUERY_CHANNELTYPE = 5,
-
- BRIG_IMAGE_QUERY_FIRST_USER_DEFINED = 6
-};
-
-typedef uint8_t BrigLinkage8_t;
-enum BrigLinkage {
- BRIG_LINKAGE_NONE = 0,
- BRIG_LINKAGE_PROGRAM = 1,
- BRIG_LINKAGE_MODULE = 2,
- BRIG_LINKAGE_FUNCTION = 3,
- BRIG_LINKAGE_ARG = 4
-};
-
-typedef uint8_t BrigMachineModel8_t;
-enum BrigMachineModel {
- BRIG_MACHINE_SMALL = 0,
- BRIG_MACHINE_LARGE = 1,
-};
-
-typedef uint8_t BrigMemoryModifier8_t;
-enum BrigMemoryModifierMask {
- BRIG_MEMORY_CONST = 1
-};
-
-typedef uint8_t BrigMemoryOrder8_t;
-enum BrigMemoryOrder {
- BRIG_MEMORY_ORDER_NONE = 0,
- BRIG_MEMORY_ORDER_RELAXED = 1,
- BRIG_MEMORY_ORDER_SC_ACQUIRE = 2,
- BRIG_MEMORY_ORDER_SC_RELEASE = 3,
- BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE = 4,
-};
-
-typedef uint8_t BrigMemoryScope8_t;
-enum BrigMemoryScope {
- BRIG_MEMORY_SCOPE_NONE = 0,
- BRIG_MEMORY_SCOPE_WORKITEM = 1,
- BRIG_MEMORY_SCOPE_WAVEFRONT = 2,
- BRIG_MEMORY_SCOPE_WORKGROUP = 3,
- BRIG_MEMORY_SCOPE_AGENT = 4,
- BRIG_MEMORY_SCOPE_SYSTEM = 5,
-};
-
-typedef uint16_t BrigOpcode16_t;
-enum BrigOpcode {
- BRIG_OPCODE_NOP = 0,
- BRIG_OPCODE_ABS = 1,
- BRIG_OPCODE_ADD = 2,
- BRIG_OPCODE_BORROW = 3,
- BRIG_OPCODE_CARRY = 4,
- BRIG_OPCODE_CEIL = 5,
- BRIG_OPCODE_COPYSIGN = 6,
- BRIG_OPCODE_DIV = 7,
- BRIG_OPCODE_FLOOR = 8,
- BRIG_OPCODE_FMA = 9,
- BRIG_OPCODE_FRACT = 10,
- BRIG_OPCODE_MAD = 11,
- BRIG_OPCODE_MAX = 12,
- BRIG_OPCODE_MIN = 13,
- BRIG_OPCODE_MUL = 14,
- BRIG_OPCODE_MULHI = 15,
- BRIG_OPCODE_NEG = 16,
- BRIG_OPCODE_REM = 17,
- BRIG_OPCODE_RINT = 18,
- BRIG_OPCODE_SQRT = 19,
- BRIG_OPCODE_SUB = 20,
- BRIG_OPCODE_TRUNC = 21,
- BRIG_OPCODE_MAD24 = 22,
- BRIG_OPCODE_MAD24HI = 23,
- BRIG_OPCODE_MUL24 = 24,
- BRIG_OPCODE_MUL24HI = 25,
- BRIG_OPCODE_SHL = 26,
- BRIG_OPCODE_SHR = 27,
- BRIG_OPCODE_AND = 28,
- BRIG_OPCODE_NOT = 29,
- BRIG_OPCODE_OR = 30,
- BRIG_OPCODE_POPCOUNT = 31,
- BRIG_OPCODE_XOR = 32,
- BRIG_OPCODE_BITEXTRACT = 33,
- BRIG_OPCODE_BITINSERT = 34,
- BRIG_OPCODE_BITMASK = 35,
- BRIG_OPCODE_BITREV = 36,
- BRIG_OPCODE_BITSELECT = 37,
- BRIG_OPCODE_FIRSTBIT = 38,
- BRIG_OPCODE_LASTBIT = 39,
- BRIG_OPCODE_COMBINE = 40,
- BRIG_OPCODE_EXPAND = 41,
- BRIG_OPCODE_LDA = 42,
- BRIG_OPCODE_MOV = 43,
- BRIG_OPCODE_SHUFFLE = 44,
- BRIG_OPCODE_UNPACKHI = 45,
- BRIG_OPCODE_UNPACKLO = 46,
- BRIG_OPCODE_PACK = 47,
- BRIG_OPCODE_UNPACK = 48,
- BRIG_OPCODE_CMOV = 49,
- BRIG_OPCODE_CLASS = 50,
- BRIG_OPCODE_NCOS = 51,
- BRIG_OPCODE_NEXP2 = 52,
- BRIG_OPCODE_NFMA = 53,
- BRIG_OPCODE_NLOG2 = 54,
- BRIG_OPCODE_NRCP = 55,
- BRIG_OPCODE_NRSQRT = 56,
- BRIG_OPCODE_NSIN = 57,
- BRIG_OPCODE_NSQRT = 58,
- BRIG_OPCODE_BITALIGN = 59,
- BRIG_OPCODE_BYTEALIGN = 60,
- BRIG_OPCODE_PACKCVT = 61,
- BRIG_OPCODE_UNPACKCVT = 62,
- BRIG_OPCODE_LERP = 63,
- BRIG_OPCODE_SAD = 64,
- BRIG_OPCODE_SADHI = 65,
- BRIG_OPCODE_SEGMENTP = 66,
- BRIG_OPCODE_FTOS = 67,
- BRIG_OPCODE_STOF = 68,
- BRIG_OPCODE_CMP = 69,
- BRIG_OPCODE_CVT = 70,
- BRIG_OPCODE_LD = 71,
- BRIG_OPCODE_ST = 72,
- BRIG_OPCODE_ATOMIC = 73,
- BRIG_OPCODE_ATOMICNORET = 74,
- BRIG_OPCODE_SIGNAL = 75,
- BRIG_OPCODE_SIGNALNORET = 76,
- BRIG_OPCODE_MEMFENCE = 77,
- BRIG_OPCODE_RDIMAGE = 78,
- BRIG_OPCODE_LDIMAGE = 79,
- BRIG_OPCODE_STIMAGE = 80,
- BRIG_OPCODE_IMAGEFENCE = 81,
- BRIG_OPCODE_QUERYIMAGE = 82,
- BRIG_OPCODE_QUERYSAMPLER = 83,
- BRIG_OPCODE_CBR = 84,
- BRIG_OPCODE_BR = 85,
- BRIG_OPCODE_SBR = 86,
- BRIG_OPCODE_BARRIER = 87,
- BRIG_OPCODE_WAVEBARRIER = 88,
- BRIG_OPCODE_ARRIVEFBAR = 89,
- BRIG_OPCODE_INITFBAR = 90,
- BRIG_OPCODE_JOINFBAR = 91,
- BRIG_OPCODE_LEAVEFBAR = 92,
- BRIG_OPCODE_RELEASEFBAR = 93,
- BRIG_OPCODE_WAITFBAR = 94,
- BRIG_OPCODE_LDF = 95,
- BRIG_OPCODE_ACTIVELANECOUNT = 96,
- BRIG_OPCODE_ACTIVELANEID = 97,
- BRIG_OPCODE_ACTIVELANEMASK = 98,
- BRIG_OPCODE_ACTIVELANEPERMUTE = 99,
- BRIG_OPCODE_CALL = 100,
- BRIG_OPCODE_SCALL = 101,
- BRIG_OPCODE_ICALL = 102,
- BRIG_OPCODE_RET = 103,
- BRIG_OPCODE_ALLOCA = 104,
- BRIG_OPCODE_CURRENTWORKGROUPSIZE = 105,
- BRIG_OPCODE_CURRENTWORKITEMFLATID = 106,
- BRIG_OPCODE_DIM = 107,
- BRIG_OPCODE_GRIDGROUPS = 108,
- BRIG_OPCODE_GRIDSIZE = 109,
- BRIG_OPCODE_PACKETCOMPLETIONSIG = 110,
- BRIG_OPCODE_PACKETID = 111,
- BRIG_OPCODE_WORKGROUPID = 112,
- BRIG_OPCODE_WORKGROUPSIZE = 113,
- BRIG_OPCODE_WORKITEMABSID = 114,
- BRIG_OPCODE_WORKITEMFLATABSID = 115,
- BRIG_OPCODE_WORKITEMFLATID = 116,
- BRIG_OPCODE_WORKITEMID = 117,
- BRIG_OPCODE_CLEARDETECTEXCEPT = 118,
- BRIG_OPCODE_GETDETECTEXCEPT = 119,
- BRIG_OPCODE_SETDETECTEXCEPT = 120,
- BRIG_OPCODE_ADDQUEUEWRITEINDEX = 121,
- BRIG_OPCODE_CASQUEUEWRITEINDEX = 122,
- BRIG_OPCODE_LDQUEUEREADINDEX = 123,
- BRIG_OPCODE_LDQUEUEWRITEINDEX = 124,
- BRIG_OPCODE_STQUEUEREADINDEX = 125,
- BRIG_OPCODE_STQUEUEWRITEINDEX = 126,
- BRIG_OPCODE_CLOCK = 127,
- BRIG_OPCODE_CUID = 128,
- BRIG_OPCODE_DEBUGTRAP = 129,
- BRIG_OPCODE_GROUPBASEPTR = 130,
- BRIG_OPCODE_KERNARGBASEPTR = 131,
- BRIG_OPCODE_LANEID = 132,
- BRIG_OPCODE_MAXCUID = 133,
- BRIG_OPCODE_MAXWAVEID = 134,
- BRIG_OPCODE_NULLPTR = 135,
- BRIG_OPCODE_WAVEID = 136,
-
- BRIG_OPCODE_FIRST_USER_DEFINED = 32768,
-};
-
-typedef uint8_t BrigPack8_t;
-enum BrigPack {
- BRIG_PACK_NONE = 0,
- BRIG_PACK_PP = 1,
- BRIG_PACK_PS = 2,
- BRIG_PACK_SP = 3,
- BRIG_PACK_SS = 4,
- BRIG_PACK_S = 5,
- BRIG_PACK_P = 6,
- BRIG_PACK_PPSAT = 7,
- BRIG_PACK_PSSAT = 8,
- BRIG_PACK_SPSAT = 9,
- BRIG_PACK_SSSAT = 10,
- BRIG_PACK_SSAT = 11,
- BRIG_PACK_PSAT = 12
-};
-
-typedef uint8_t BrigProfile8_t;
-enum BrigProfile {
- BRIG_PROFILE_BASE = 0,
- BRIG_PROFILE_FULL = 1,
-};
-
-typedef uint16_t BrigRegisterKind16_t;
-enum BrigRegisterKind {
- BRIG_REGISTER_KIND_CONTROL = 0,
- BRIG_REGISTER_KIND_SINGLE = 1,
- BRIG_REGISTER_KIND_DOUBLE = 2,
- BRIG_REGISTER_KIND_QUAD = 3
-};
-
-typedef uint8_t BrigRound8_t;
-enum BrigRound {
- BRIG_ROUND_NONE = 0,
- BRIG_ROUND_FLOAT_DEFAULT = 1,
- BRIG_ROUND_FLOAT_NEAR_EVEN = 2,
- BRIG_ROUND_FLOAT_ZERO = 3,
- BRIG_ROUND_FLOAT_PLUS_INFINITY = 4,
- BRIG_ROUND_FLOAT_MINUS_INFINITY = 5,
- BRIG_ROUND_INTEGER_NEAR_EVEN = 6,
- BRIG_ROUND_INTEGER_ZERO = 7,
- BRIG_ROUND_INTEGER_PLUS_INFINITY = 8,
- BRIG_ROUND_INTEGER_MINUS_INFINITY = 9,
- BRIG_ROUND_INTEGER_NEAR_EVEN_SAT = 10,
- BRIG_ROUND_INTEGER_ZERO_SAT = 11,
- BRIG_ROUND_INTEGER_PLUS_INFINITY_SAT = 12,
- BRIG_ROUND_INTEGER_MINUS_INFINITY_SAT = 13,
- BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN = 14,
- BRIG_ROUND_INTEGER_SIGNALING_ZERO = 15,
- BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY = 16,
- BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY = 17,
- BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN_SAT = 18,
- BRIG_ROUND_INTEGER_SIGNALING_ZERO_SAT = 19,
- BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY_SAT = 20,
- BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY_SAT = 21
-};
-
-typedef uint8_t BrigSamplerAddressing8_t;
-enum BrigSamplerAddressing {
- BRIG_ADDRESSING_UNDEFINED = 0,
- BRIG_ADDRESSING_CLAMP_TO_EDGE = 1,
- BRIG_ADDRESSING_CLAMP_TO_BORDER = 2,
- BRIG_ADDRESSING_REPEAT = 3,
- BRIG_ADDRESSING_MIRRORED_REPEAT = 4,
-
- BRIG_ADDRESSING_FIRST_USER_DEFINED = 128
-};
-
-typedef uint8_t BrigSamplerCoordNormalization8_t;
-enum BrigSamplerCoordNormalization {
- BRIG_COORD_UNNORMALIZED = 0,
- BRIG_COORD_NORMALIZED = 1
-};
-
-typedef uint8_t BrigSamplerFilter8_t;
-enum BrigSamplerFilter {
- BRIG_FILTER_NEAREST = 0,
- BRIG_FILTER_LINEAR = 1,
-
- BRIG_FILTER_FIRST_USER_DEFINED = 128
-};
-
-typedef uint8_t BrigSamplerQuery8_t;
-enum BrigSamplerQuery {
- BRIG_SAMPLER_QUERY_ADDRESSING = 0,
- BRIG_SAMPLER_QUERY_COORD = 1,
- BRIG_SAMPLER_QUERY_FILTER = 2
-};
-
-typedef uint32_t BrigSectionIndex32_t;
-enum BrigSectionIndex {
- BRIG_SECTION_INDEX_DATA = 0,
- BRIG_SECTION_INDEX_CODE = 1,
- BRIG_SECTION_INDEX_OPERAND = 2,
-
- BRIG_SECTION_INDEX_BEGIN_IMPLEMENTATION_DEFINED = 3,
-};
-
-typedef uint8_t BrigSegCvtModifier8_t;
-enum BrigSegCvtModifierMask {
- BRIG_SEG_CVT_NONULL = 1
-};
-
-typedef uint8_t BrigSegment8_t;
-enum BrigSegment {
- BRIG_SEGMENT_NONE = 0,
- BRIG_SEGMENT_FLAT = 1,
- BRIG_SEGMENT_GLOBAL = 2,
- BRIG_SEGMENT_READONLY = 3,
- BRIG_SEGMENT_KERNARG = 4,
- BRIG_SEGMENT_GROUP = 5,
- BRIG_SEGMENT_PRIVATE = 6,
- BRIG_SEGMENT_SPILL = 7,
- BRIG_SEGMENT_ARG = 8,
-
- BRIG_SEGMENT_FIRST_USER_DEFINED = 128
-};
-
-enum {
- BRIG_TYPE_BASE_SIZE = 5,
- BRIG_TYPE_PACK_SIZE = 2,
- BRIG_TYPE_ARRAY_SIZE = 1,
-
- BRIG_TYPE_BASE_SHIFT = 0,
- BRIG_TYPE_PACK_SHIFT = BRIG_TYPE_BASE_SHIFT + BRIG_TYPE_BASE_SIZE,
- BRIG_TYPE_ARRAY_SHIFT = BRIG_TYPE_PACK_SHIFT + BRIG_TYPE_PACK_SIZE,
-
- BRIG_TYPE_BASE_MASK = ((1 << BRIG_TYPE_BASE_SIZE) - 1) << BRIG_TYPE_BASE_SHIFT,
- BRIG_TYPE_PACK_MASK = ((1 << BRIG_TYPE_PACK_SIZE) - 1) << BRIG_TYPE_PACK_SHIFT,
- BRIG_TYPE_ARRAY_MASK = ((1 << BRIG_TYPE_ARRAY_SIZE) - 1) << BRIG_TYPE_ARRAY_SHIFT,
-
- BRIG_TYPE_PACK_NONE = 0 << BRIG_TYPE_PACK_SHIFT,
- BRIG_TYPE_PACK_32 = 1 << BRIG_TYPE_PACK_SHIFT,
- BRIG_TYPE_PACK_64 = 2 << BRIG_TYPE_PACK_SHIFT,
- BRIG_TYPE_PACK_128 = 3 << BRIG_TYPE_PACK_SHIFT,
-
- BRIG_TYPE_ARRAY = 1 << BRIG_TYPE_ARRAY_SHIFT
-};
-
-typedef uint16_t BrigType16_t;
-enum BrigType {
- BRIG_TYPE_NONE = 0,
- BRIG_TYPE_U8 = 1,
- BRIG_TYPE_U16 = 2,
- BRIG_TYPE_U32 = 3,
- BRIG_TYPE_U64 = 4,
- BRIG_TYPE_S8 = 5,
- BRIG_TYPE_S16 = 6,
- BRIG_TYPE_S32 = 7,
- BRIG_TYPE_S64 = 8,
- BRIG_TYPE_F16 = 9,
- BRIG_TYPE_F32 = 10,
- BRIG_TYPE_F64 = 11,
- BRIG_TYPE_B1 = 12,
- BRIG_TYPE_B8 = 13,
- BRIG_TYPE_B16 = 14,
- BRIG_TYPE_B32 = 15,
- BRIG_TYPE_B64 = 16,
- BRIG_TYPE_B128 = 17,
- BRIG_TYPE_SAMP = 18,
- BRIG_TYPE_ROIMG = 19,
- BRIG_TYPE_WOIMG = 20,
- BRIG_TYPE_RWIMG = 21,
- BRIG_TYPE_SIG32 = 22,
- BRIG_TYPE_SIG64 = 23,
-
- BRIG_TYPE_U8X4 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_32,
- BRIG_TYPE_U8X8 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_64,
- BRIG_TYPE_U8X16 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_128,
- BRIG_TYPE_U16X2 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_32,
- BRIG_TYPE_U16X4 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_64,
- BRIG_TYPE_U16X8 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_128,
- BRIG_TYPE_U32X2 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_64,
- BRIG_TYPE_U32X4 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_128,
- BRIG_TYPE_U64X2 = BRIG_TYPE_U64 | BRIG_TYPE_PACK_128,
- BRIG_TYPE_S8X4 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_32,
- BRIG_TYPE_S8X8 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_64,
- BRIG_TYPE_S8X16 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_128,
- BRIG_TYPE_S16X2 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_32,
- BRIG_TYPE_S16X4 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_64,
- BRIG_TYPE_S16X8 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_128,
- BRIG_TYPE_S32X2 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_64,
- BRIG_TYPE_S32X4 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_128,
- BRIG_TYPE_S64X2 = BRIG_TYPE_S64 | BRIG_TYPE_PACK_128,
- BRIG_TYPE_F16X2 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_32,
- BRIG_TYPE_F16X4 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_64,
- BRIG_TYPE_F16X8 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_128,
- BRIG_TYPE_F32X2 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_64,
- BRIG_TYPE_F32X4 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_128,
- BRIG_TYPE_F64X2 = BRIG_TYPE_F64 | BRIG_TYPE_PACK_128,
-
- BRIG_TYPE_U8_ARRAY = BRIG_TYPE_U8 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_U16_ARRAY = BRIG_TYPE_U16 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_U32_ARRAY = BRIG_TYPE_U32 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_U64_ARRAY = BRIG_TYPE_U64 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_S8_ARRAY = BRIG_TYPE_S8 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_S16_ARRAY = BRIG_TYPE_S16 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_S32_ARRAY = BRIG_TYPE_S32 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_S64_ARRAY = BRIG_TYPE_S64 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_F16_ARRAY = BRIG_TYPE_F16 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_F32_ARRAY = BRIG_TYPE_F32 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_F64_ARRAY = BRIG_TYPE_F64 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_B8_ARRAY = BRIG_TYPE_B8 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_B16_ARRAY = BRIG_TYPE_B16 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_B32_ARRAY = BRIG_TYPE_B32 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_B64_ARRAY = BRIG_TYPE_B64 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_B128_ARRAY = BRIG_TYPE_B128 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_SAMP_ARRAY = BRIG_TYPE_SAMP | BRIG_TYPE_ARRAY,
- BRIG_TYPE_ROIMG_ARRAY = BRIG_TYPE_ROIMG | BRIG_TYPE_ARRAY,
- BRIG_TYPE_WOIMG_ARRAY = BRIG_TYPE_WOIMG | BRIG_TYPE_ARRAY,
- BRIG_TYPE_RWIMG_ARRAY = BRIG_TYPE_RWIMG | BRIG_TYPE_ARRAY,
- BRIG_TYPE_SIG32_ARRAY = BRIG_TYPE_SIG32 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_SIG64_ARRAY = BRIG_TYPE_SIG64 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_U8X4_ARRAY = BRIG_TYPE_U8X4 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_U8X8_ARRAY = BRIG_TYPE_U8X8 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_U8X16_ARRAY = BRIG_TYPE_U8X16 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_U16X2_ARRAY = BRIG_TYPE_U16X2 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_U16X4_ARRAY = BRIG_TYPE_U16X4 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_U16X8_ARRAY = BRIG_TYPE_U16X8 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_U32X2_ARRAY = BRIG_TYPE_U32X2 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_U32X4_ARRAY = BRIG_TYPE_U32X4 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_U64X2_ARRAY = BRIG_TYPE_U64X2 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_S8X4_ARRAY = BRIG_TYPE_S8X4 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_S8X8_ARRAY = BRIG_TYPE_S8X8 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_S8X16_ARRAY = BRIG_TYPE_S8X16 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_S16X2_ARRAY = BRIG_TYPE_S16X2 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_S16X4_ARRAY = BRIG_TYPE_S16X4 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_S16X8_ARRAY = BRIG_TYPE_S16X8 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_S32X2_ARRAY = BRIG_TYPE_S32X2 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_S32X4_ARRAY = BRIG_TYPE_S32X4 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_S64X2_ARRAY = BRIG_TYPE_S64X2 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_F16X2_ARRAY = BRIG_TYPE_F16X2 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_F16X4_ARRAY = BRIG_TYPE_F16X4 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_F16X8_ARRAY = BRIG_TYPE_F16X8 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_F32X2_ARRAY = BRIG_TYPE_F32X2 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_F32X4_ARRAY = BRIG_TYPE_F32X4 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_F64X2_ARRAY = BRIG_TYPE_F64X2 | BRIG_TYPE_ARRAY,
-};
-
-typedef uint8_t BrigVariableModifier8_t;
-enum BrigVariableModifierMask {
- BRIG_VARIABLE_DEFINITION = 1,
- BRIG_VARIABLE_CONST = 2
-};
-
-typedef uint8_t BrigWidth8_t;
-enum BrigWidth {
- BRIG_WIDTH_NONE = 0,
- BRIG_WIDTH_1 = 1,
- BRIG_WIDTH_2 = 2,
- BRIG_WIDTH_4 = 3,
- BRIG_WIDTH_8 = 4,
- BRIG_WIDTH_16 = 5,
- BRIG_WIDTH_32 = 6,
- BRIG_WIDTH_64 = 7,
- BRIG_WIDTH_128 = 8,
- BRIG_WIDTH_256 = 9,
- BRIG_WIDTH_512 = 10,
- BRIG_WIDTH_1024 = 11,
- BRIG_WIDTH_2048 = 12,
- BRIG_WIDTH_4096 = 13,
- BRIG_WIDTH_8192 = 14,
- BRIG_WIDTH_16384 = 15,
- BRIG_WIDTH_32768 = 16,
- BRIG_WIDTH_65536 = 17,
- BRIG_WIDTH_131072 = 18,
- BRIG_WIDTH_262144 = 19,
- BRIG_WIDTH_524288 = 20,
- BRIG_WIDTH_1048576 = 21,
- BRIG_WIDTH_2097152 = 22,
- BRIG_WIDTH_4194304 = 23,
- BRIG_WIDTH_8388608 = 24,
- BRIG_WIDTH_16777216 = 25,
- BRIG_WIDTH_33554432 = 26,
- BRIG_WIDTH_67108864 = 27,
- BRIG_WIDTH_134217728 = 28,
- BRIG_WIDTH_268435456 = 29,
- BRIG_WIDTH_536870912 = 30,
- BRIG_WIDTH_1073741824 = 31,
- BRIG_WIDTH_2147483648 = 32,
- BRIG_WIDTH_WAVESIZE = 33,
- BRIG_WIDTH_ALL = 34,
-};
-
-struct BrigUInt64 {
- uint32_t lo;
- uint32_t hi;
-};
-
-struct BrigBase {
- uint16_t byteCount;
- BrigKind16_t kind;
-};
-
-struct BrigData {
- uint32_t byteCount;
- uint8_t bytes[1];
-};
-
-struct BrigDirectiveArgBlock {
- BrigBase base;
-};
-
-struct BrigDirectiveComment {
- BrigBase base;
- BrigDataOffsetString32_t name;
-};
-
-struct BrigDirectiveControl {
- BrigBase base;
- BrigControlDirective16_t control;
- uint16_t reserved;
- BrigDataOffsetOperandList32_t operands;
-};
-
-struct BrigDirectiveExecutable {
- BrigBase base;
- BrigDataOffsetString32_t name;
- uint16_t outArgCount;
- uint16_t inArgCount;
- BrigCodeOffset32_t firstInArg;
- BrigCodeOffset32_t firstCodeBlockEntry;
- BrigCodeOffset32_t nextModuleEntry;
- BrigExecutableModifier8_t modifier;
- BrigLinkage8_t linkage;
- uint16_t reserved;
-};
-
-struct BrigDirectiveExtension {
- BrigBase base;
- BrigDataOffsetString32_t name;
-};
-
-struct BrigDirectiveFbarrier {
- BrigBase base;
- BrigDataOffsetString32_t name;
- BrigVariableModifier8_t modifier;
- BrigLinkage8_t linkage;
- uint16_t reserved;
-};
-
-struct BrigDirectiveLabel {
- BrigBase base;
- BrigDataOffsetString32_t name;
-};
-
-struct BrigDirectiveLoc {
- BrigBase base;
- BrigDataOffsetString32_t filename;
- uint32_t line;
- uint32_t column;
-};
-
-struct BrigDirectiveNone {
- BrigBase base;
-};
-
-struct BrigDirectivePragma {
- BrigBase base;
- BrigDataOffsetOperandList32_t operands;
-};
-
-struct BrigDirectiveVariable {
- BrigBase base;
- BrigDataOffsetString32_t name;
- BrigOperandOffset32_t init;
- BrigType16_t type;
- BrigSegment8_t segment;
- BrigAlignment8_t align;
- BrigUInt64 dim;
- BrigVariableModifier8_t modifier;
- BrigLinkage8_t linkage;
- BrigAllocation8_t allocation;
- uint8_t reserved;
-};
-
-struct BrigDirectiveModule {
- BrigBase base;
- BrigDataOffsetString32_t name;
- BrigVersion32_t hsailMajor;
- BrigVersion32_t hsailMinor;
- BrigProfile8_t profile;
- BrigMachineModel8_t machineModel;
- BrigRound8_t defaultFloatRound;
- uint8_t reserved;
-};
-
-struct BrigInstBase {
- BrigBase base;
- BrigOpcode16_t opcode;
- BrigType16_t type;
- BrigDataOffsetOperandList32_t operands;
-};
-
-struct BrigInstAddr {
- BrigInstBase base;
- BrigSegment8_t segment;
- uint8_t reserved[3];
-};
-
-struct BrigInstAtomic {
- BrigInstBase base;
- BrigSegment8_t segment;
- BrigMemoryOrder8_t memoryOrder;
- BrigMemoryScope8_t memoryScope;
- BrigAtomicOperation8_t atomicOperation;
- uint8_t equivClass;
- uint8_t reserved[3];
-};
-
-struct BrigInstBasic {
- BrigInstBase base;
-};
-
-struct BrigInstBr {
- BrigInstBase base;
- BrigWidth8_t width;
- uint8_t reserved[3];
-};
-
-struct BrigInstCmp {
- BrigInstBase base;
- BrigType16_t sourceType;
- BrigAluModifier8_t modifier;
- BrigCompareOperation8_t compare;
- BrigPack8_t pack;
- uint8_t reserved[3];
-};
-
-struct BrigInstCvt {
- BrigInstBase base;
- BrigType16_t sourceType;
- BrigAluModifier8_t modifier;
- BrigRound8_t round;
-};
-
-struct BrigInstImage {
- BrigInstBase base;
- BrigType16_t imageType;
- BrigType16_t coordType;
- BrigImageGeometry8_t geometry;
- uint8_t equivClass;
- uint16_t reserved;
-};
-
-struct BrigInstLane {
- BrigInstBase base;
- BrigType16_t sourceType;
- BrigWidth8_t width;
- uint8_t reserved;
-};
-
-struct BrigInstMem {
- BrigInstBase base;
- BrigSegment8_t segment;
- BrigAlignment8_t align;
- uint8_t equivClass;
- BrigWidth8_t width;
- BrigMemoryModifier8_t modifier;
- uint8_t reserved[3];
-};
-
-struct BrigInstMemFence {
- BrigInstBase base;
- BrigMemoryOrder8_t memoryOrder;
- BrigMemoryScope8_t globalSegmentMemoryScope;
- BrigMemoryScope8_t groupSegmentMemoryScope;
- BrigMemoryScope8_t imageSegmentMemoryScope;
-};
-
-struct BrigInstMod {
- BrigInstBase base;
- BrigAluModifier8_t modifier;
- BrigRound8_t round;
- BrigPack8_t pack;
- uint8_t reserved;
-};
-
-struct BrigInstQueryImage {
- BrigInstBase base;
- BrigType16_t imageType;
- BrigImageGeometry8_t geometry;
- BrigImageQuery8_t query;
-};
-
-struct BrigInstQuerySampler {
- BrigInstBase base;
- BrigSamplerQuery8_t query;
- uint8_t reserved[3];
-};
-
-struct BrigInstQueue {
- BrigInstBase base;
- BrigSegment8_t segment;
- BrigMemoryOrder8_t memoryOrder;
- uint16_t reserved;
-};
-
-struct BrigInstSeg {
- BrigInstBase base;
- BrigSegment8_t segment;
- uint8_t reserved[3];
-};
-
-struct BrigInstSegCvt {
- BrigInstBase base;
- BrigType16_t sourceType;
- BrigSegment8_t segment;
- BrigSegCvtModifier8_t modifier;
-};
-
-struct BrigInstSignal {
- BrigInstBase base;
- BrigType16_t signalType;
- BrigMemoryOrder8_t memoryOrder;
- BrigAtomicOperation8_t signalOperation;
-};
-
-struct BrigInstSourceType {
- BrigInstBase base;
- BrigType16_t sourceType;
- uint16_t reserved;
-};
-
-struct BrigOperandAddress {
- BrigBase base;
- BrigCodeOffset32_t symbol;
- BrigOperandOffset32_t reg;
- BrigUInt64 offset;
-};
-
-struct BrigOperandAlign {
- BrigBase base;
- BrigAlignment8_t align;
- uint8_t reserved[3];
-};
-
-struct BrigOperandCodeList {
- BrigBase base;
- BrigDataOffsetCodeList32_t elements;
-};
-
-struct BrigOperandCodeRef {
- BrigBase base;
- BrigCodeOffset32_t ref;
-};
-
-struct BrigOperandConstantBytes {
- BrigBase base;
- BrigType16_t type;
- uint16_t reserved;
- BrigDataOffsetString32_t bytes;
-};
-
-struct BrigOperandConstantOperandList {
- BrigBase base;
- BrigType16_t type;
- uint16_t reserved;
- BrigDataOffsetOperandList32_t elements;
-};
-
-struct BrigOperandConstantImage {
- BrigBase base;
- BrigType16_t type;
- BrigImageGeometry8_t geometry;
- BrigImageChannelOrder8_t channelOrder;
- BrigImageChannelType8_t channelType;
- uint8_t reserved[3];
- BrigUInt64 width;
- BrigUInt64 height;
- BrigUInt64 depth;
- BrigUInt64 array;
-};
-
-struct BrigOperandOperandList {
- BrigBase base;
- BrigDataOffsetOperandList32_t elements;
-};
-
-struct BrigOperandRegister {
- BrigBase base;
- BrigRegisterKind16_t regKind;
- uint16_t regNum;
-};
-
-struct BrigOperandConstantSampler {
- BrigBase base;
- BrigType16_t type;
- BrigSamplerCoordNormalization8_t coord;
- BrigSamplerFilter8_t filter;
- BrigSamplerAddressing8_t addressing;
- uint8_t reserved[3];
-};
-
-struct BrigOperandString {
- BrigBase base;
- BrigDataOffsetString32_t string;
-};
-
-struct BrigOperandWavesize {
- BrigBase base;
-};
-
-typedef uint32_t BrigExceptions32_t;
-enum BrigExceptionsMask {
- BRIG_EXCEPTIONS_INVALID_OPERATION = 1 << 0,
- BRIG_EXCEPTIONS_DIVIDE_BY_ZERO = 1 << 1,
- BRIG_EXCEPTIONS_OVERFLOW = 1 << 2,
- BRIG_EXCEPTIONS_UNDERFLOW = 1 << 3,
- BRIG_EXCEPTIONS_INEXACT = 1 << 4,
-
- BRIG_EXCEPTIONS_FIRST_USER_DEFINED = 1 << 16
-};
-
-struct BrigSectionHeader {
- uint64_t byteCount;
- uint32_t headerByteCount;
- uint32_t nameLength;
- uint8_t name[1];
-};
-
-struct BrigModuleHeader {
- char identification[8];
- BrigVersion32_t brigMajor;
- BrigVersion32_t brigMinor;
- uint64_t byteCount;
- uint8_t hash[64];
- uint32_t reserved;
- uint32_t sectionCount;
- uint64_t sectionIndex;
-};
-
-typedef BrigModuleHeader* BrigModule_t;
-
-#ifdef __cplusplus
-}
-#endif /*__cplusplus*/
-
-#endif // defined(INCLUDED_BRIG_H)
diff --git a/third_party/rocm/include/hsa/amd_hsa_common.h b/third_party/rocm/include/hsa/amd_hsa_common.h
deleted file mode 100644
index 7c4ed3e..0000000
--- a/third_party/rocm/include/hsa/amd_hsa_common.h
+++ /dev/null
@@ -1,91 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-//
-// The University of Illinois/NCSA
-// Open Source License (NCSA)
-//
-// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
-//
-// Developed by:
-//
-// AMD Research and AMD HSA Software Development
-//
-// Advanced Micro Devices, Inc.
-//
-// www.amd.com
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to
-// deal with the Software without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-// and/or sell copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following conditions:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimers.
-// - Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimers in
-// the documentation and/or other materials provided with the distribution.
-// - Neither the names of Advanced Micro Devices, Inc,
-// nor the names of its contributors may be used to endorse or promote
-// products derived from this Software without specific prior written
-// permission.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-// DEALINGS WITH THE SOFTWARE.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-// The following set of header files provides definitions for AMD GPU
-// Architecture:
-// - amd_hsa_common.h
-// - amd_hsa_elf.h
-// - amd_hsa_kernel_code.h
-// - amd_hsa_queue.h
-// - amd_hsa_signal.h
-//
-// Refer to "HSA Application Binary Interface: AMD GPU Architecture" for more
-// information.
-
-#ifndef AMD_HSA_COMMON_H
-#define AMD_HSA_COMMON_H
-
-#include <stddef.h>
-#include <stdint.h>
-
-// Descriptive version of the HSA Application Binary Interface.
-#define AMD_HSA_ABI_VERSION "AMD GPU Architecture v0.35 (June 25, 2015)"
-
-// Alignment attribute that specifies a minimum alignment (in bytes) for
-// variables of the specified type.
-#if defined(__GNUC__)
-# define __ALIGNED__(x) __attribute__((aligned(x)))
-#elif defined(_MSC_VER)
-# define __ALIGNED__(x) __declspec(align(x))
-#elif defined(RC_INVOKED)
-# define __ALIGNED__(x)
-#else
-# error
-#endif
-
-// Creates enumeration entries for packed types. Enumeration entries include
-// bit shift amount, bit width, and bit mask.
-#define AMD_HSA_BITS_CREATE_ENUM_ENTRIES(name, shift, width) \
- name##_SHIFT = (shift), \
- name##_WIDTH = (width), \
- name = (((1 << (width)) - 1) << (shift)) \
-
-// Gets bits for specified mask from specified src packed instance.
-#define AMD_HSA_BITS_GET(src, mask) \
- ((src & mask) >> mask ## _SHIFT) \
-
-// Sets val bits for specified mask in specified dst packed instance.
-#define AMD_HSA_BITS_SET(dst, mask, val) \
- dst &= (~(1 << mask##_SHIFT) & ~mask); \
- dst |= (((val) << mask##_SHIFT) & mask) \
-
-#endif // AMD_HSA_COMMON_H
diff --git a/third_party/rocm/include/hsa/amd_hsa_elf.h b/third_party/rocm/include/hsa/amd_hsa_elf.h
deleted file mode 100644
index adcdec4..0000000
--- a/third_party/rocm/include/hsa/amd_hsa_elf.h
+++ /dev/null
@@ -1,416 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-//
-// The University of Illinois/NCSA
-// Open Source License (NCSA)
-//
-// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
-//
-// Developed by:
-//
-// AMD Research and AMD HSA Software Development
-//
-// Advanced Micro Devices, Inc.
-//
-// www.amd.com
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to
-// deal with the Software without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-// and/or sell copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following conditions:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimers.
-// - Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimers in
-// the documentation and/or other materials provided with the distribution.
-// - Neither the names of Advanced Micro Devices, Inc,
-// nor the names of its contributors may be used to endorse or promote
-// products derived from this Software without specific prior written
-// permission.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-// DEALINGS WITH THE SOFTWARE.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-// Undefine the macro in case it is defined in the system elf.h.
-#undef EM_AMDGPU
-
-#ifndef AMD_HSA_ELF_H
-#define AMD_HSA_ELF_H
-
-// AMD GPU Specific ELF Header Enumeration Values.
-//
-// Values are copied from LLVM BinaryFormat/ELF.h . This file also contains
-// code object V1 defintions which are not part of the LLVM header. Code object
-// V1 was only supported by the Finalizer which is now deprecated and removed.
-//
-// TODO: Deprecate and remove V1 support and replace this header with using the
-// LLVM header.
-namespace ELF {
-
-// Machine architectures
-// See current registered ELF machine architectures at:
-// http://www.uxsglobal.com/developers/gabi/latest/ch4.eheader.html
-enum {
- EM_AMDGPU = 224, // AMD GPU architecture
-};
-
-// OS ABI identification.
-enum {
- ELFOSABI_AMDGPU_HSA = 64, // AMD HSA runtime
-};
-
-// AMDGPU OS ABI Version identification.
-enum {
- // ELFABIVERSION_AMDGPU_HSA_V1 does not exist because OS ABI identification
- // was never defined for V1.
- ELFABIVERSION_AMDGPU_HSA_V2 = 0,
- ELFABIVERSION_AMDGPU_HSA_V3 = 1,
- ELFABIVERSION_AMDGPU_HSA_V4 = 2
-};
-
-// AMDGPU specific e_flags.
-enum : unsigned {
- // Processor selection mask for EF_AMDGPU_MACH_* values.
- EF_AMDGPU_MACH = 0x0ff,
-
- // Not specified processor.
- EF_AMDGPU_MACH_NONE = 0x000,
-
- // AMDGCN-based processors.
- EF_AMDGPU_MACH_AMDGCN_GFX600 = 0x020,
- EF_AMDGPU_MACH_AMDGCN_GFX601 = 0x021,
- EF_AMDGPU_MACH_AMDGCN_GFX700 = 0x022,
- EF_AMDGPU_MACH_AMDGCN_GFX701 = 0x023,
- EF_AMDGPU_MACH_AMDGCN_GFX702 = 0x024,
- EF_AMDGPU_MACH_AMDGCN_GFX703 = 0x025,
- EF_AMDGPU_MACH_AMDGCN_GFX704 = 0x026,
- EF_AMDGPU_MACH_AMDGCN_RESERVED_0X27 = 0x027,
- EF_AMDGPU_MACH_AMDGCN_GFX801 = 0x028,
- EF_AMDGPU_MACH_AMDGCN_GFX802 = 0x029,
- EF_AMDGPU_MACH_AMDGCN_GFX803 = 0x02a,
- EF_AMDGPU_MACH_AMDGCN_GFX810 = 0x02b,
- EF_AMDGPU_MACH_AMDGCN_GFX900 = 0x02c,
- EF_AMDGPU_MACH_AMDGCN_GFX902 = 0x02d,
- EF_AMDGPU_MACH_AMDGCN_GFX904 = 0x02e,
- EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f,
- EF_AMDGPU_MACH_AMDGCN_GFX908 = 0x030,
- EF_AMDGPU_MACH_AMDGCN_GFX909 = 0x031,
- EF_AMDGPU_MACH_AMDGCN_GFX90C = 0x032,
- EF_AMDGPU_MACH_AMDGCN_GFX1010 = 0x033,
- EF_AMDGPU_MACH_AMDGCN_GFX1011 = 0x034,
- EF_AMDGPU_MACH_AMDGCN_GFX1012 = 0x035,
- EF_AMDGPU_MACH_AMDGCN_GFX1030 = 0x036,
- EF_AMDGPU_MACH_AMDGCN_GFX1031 = 0x037,
- EF_AMDGPU_MACH_AMDGCN_GFX1032 = 0x038,
- EF_AMDGPU_MACH_AMDGCN_GFX1033 = 0x039,
- EF_AMDGPU_MACH_AMDGCN_GFX602 = 0x03a,
- EF_AMDGPU_MACH_AMDGCN_GFX705 = 0x03b,
- EF_AMDGPU_MACH_AMDGCN_GFX805 = 0x03c,
-
- // First/last AMDGCN-based processors.
- EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
- EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX805,
-
- // Indicates if the "xnack" target feature is enabled for all code contained
- // in the object.
- //
- // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V2.
- EF_AMDGPU_FEATURE_XNACK_V2 = 0x01,
- // Indicates if the trap handler is enabled for all code contained
- // in the object.
- //
- // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V2.
- EF_AMDGPU_FEATURE_TRAP_HANDLER_V2 = 0x02,
-
- // Indicates if the "xnack" target feature is enabled for all code contained
- // in the object.
- //
- // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3.
- EF_AMDGPU_FEATURE_XNACK_V3 = 0x100,
- // Indicates if the "sramecc" target feature is enabled for all code
- // contained in the object.
- //
- // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3.
- EF_AMDGPU_FEATURE_SRAMECC_V3 = 0x200,
-
- // XNACK selection mask for EF_AMDGPU_FEATURE_XNACK_* values.
- //
- // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4.
- EF_AMDGPU_FEATURE_XNACK_V4 = 0x300,
- // XNACK is not supported.
- EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 = 0x000,
- // XNACK is any/default/unspecified.
- EF_AMDGPU_FEATURE_XNACK_ANY_V4 = 0x100,
- // XNACK is off.
- EF_AMDGPU_FEATURE_XNACK_OFF_V4 = 0x200,
- // XNACK is on.
- EF_AMDGPU_FEATURE_XNACK_ON_V4 = 0x300,
-
- // SRAMECC selection mask for EF_AMDGPU_FEATURE_SRAMECC_* values.
- //
- // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4.
- EF_AMDGPU_FEATURE_SRAMECC_V4 = 0xc00,
- // SRAMECC is not supported.
- EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4 = 0x000,
- // SRAMECC is any/default/unspecified.
- EF_AMDGPU_FEATURE_SRAMECC_ANY_V4 = 0x400,
- // SRAMECC is off.
- EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 = 0x800,
- // SRAMECC is on.
- EF_AMDGPU_FEATURE_SRAMECC_ON_V4 = 0xc00,
-};
-
-} // end namespace ELF
-
-// ELF Section Header Flag Enumeration Values.
-#define SHF_AMDGPU_HSA_GLOBAL (0x00100000 & SHF_MASKOS)
-#define SHF_AMDGPU_HSA_READONLY (0x00200000 & SHF_MASKOS)
-#define SHF_AMDGPU_HSA_CODE (0x00400000 & SHF_MASKOS)
-#define SHF_AMDGPU_HSA_AGENT (0x00800000 & SHF_MASKOS)
-
-//
-typedef enum {
- AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM = 0,
- AMDGPU_HSA_SEGMENT_GLOBAL_AGENT = 1,
- AMDGPU_HSA_SEGMENT_READONLY_AGENT = 2,
- AMDGPU_HSA_SEGMENT_CODE_AGENT = 3,
- AMDGPU_HSA_SEGMENT_LAST,
-} amdgpu_hsa_elf_segment_t;
-
-// ELF Program Header Type Enumeration Values.
-#define PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM (PT_LOOS + AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM)
-#define PT_AMDGPU_HSA_LOAD_GLOBAL_AGENT (PT_LOOS + AMDGPU_HSA_SEGMENT_GLOBAL_AGENT)
-#define PT_AMDGPU_HSA_LOAD_READONLY_AGENT (PT_LOOS + AMDGPU_HSA_SEGMENT_READONLY_AGENT)
-#define PT_AMDGPU_HSA_LOAD_CODE_AGENT (PT_LOOS + AMDGPU_HSA_SEGMENT_CODE_AGENT)
-
-// ELF Symbol Type Enumeration Values.
-#define STT_AMDGPU_HSA_KERNEL (STT_LOOS + 0)
-#define STT_AMDGPU_HSA_INDIRECT_FUNCTION (STT_LOOS + 1)
-#define STT_AMDGPU_HSA_METADATA (STT_LOOS + 2)
-
-// ELF Symbol Binding Enumeration Values.
-#define STB_AMDGPU_HSA_EXTERNAL (STB_LOOS + 0)
-
-// ELF Symbol Other Information Creation/Retrieval.
-#define ELF64_ST_AMDGPU_ALLOCATION(o) (((o) >> 2) & 0x3)
-#define ELF64_ST_AMDGPU_FLAGS(o) ((o) >> 4)
-#define ELF64_ST_AMDGPU_OTHER(f, a, v) (((f) << 4) + (((a) & 0x3) << 2) + ((v) & 0x3))
-
-typedef enum {
- AMDGPU_HSA_SYMBOL_ALLOCATION_DEFAULT = 0,
- AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_PROGRAM = 1,
- AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_AGENT = 2,
- AMDGPU_HSA_SYMBOL_ALLOCATION_READONLY_AGENT = 3,
- AMDGPU_HSA_SYMBOL_ALLOCATION_LAST,
-} amdgpu_hsa_symbol_allocation_t;
-
-// ELF Symbol Allocation Enumeration Values.
-#define STA_AMDGPU_HSA_DEFAULT AMDGPU_HSA_SYMBOL_ALLOCATION_DEFAULT
-#define STA_AMDGPU_HSA_GLOBAL_PROGRAM AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_PROGRAM
-#define STA_AMDGPU_HSA_GLOBAL_AGENT AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_AGENT
-#define STA_AMDGPU_HSA_READONLY_AGENT AMDGPU_HSA_SYMBOL_ALLOCATION_READONLY_AGENT
-
-typedef enum {
- AMDGPU_HSA_SYMBOL_FLAG_DEFAULT = 0,
- AMDGPU_HSA_SYMBOL_FLAG_CONST = 1,
- AMDGPU_HSA_SYMBOL_FLAG_LAST,
-} amdgpu_hsa_symbol_flag_t;
-
-// ELF Symbol Flag Enumeration Values.
-#define STF_AMDGPU_HSA_CONST AMDGPU_HSA_SYMBOL_FLAG_CONST
-
-// AMD GPU Relocation Type Enumeration Values.
-#define R_AMDGPU_NONE 0
-#define R_AMDGPU_32_LOW 1
-#define R_AMDGPU_32_HIGH 2
-#define R_AMDGPU_64 3
-#define R_AMDGPU_INIT_SAMPLER 4
-#define R_AMDGPU_INIT_IMAGE 5
-#define R_AMDGPU_RELATIVE64 13
-
-// AMD GPU Note Type Enumeration Values.
-#define NT_AMD_HSA_CODE_OBJECT_VERSION 1
-#define NT_AMD_HSA_HSAIL 2
-#define NT_AMD_HSA_ISA_VERSION 3
-#define NT_AMD_HSA_PRODUCER 4
-#define NT_AMD_HSA_PRODUCER_OPTIONS 5
-#define NT_AMD_HSA_EXTENSION 6
-#define NT_AMD_HSA_ISA_NAME 11
-#define NT_AMD_HSA_HLDEBUG_DEBUG 101
-#define NT_AMD_HSA_HLDEBUG_TARGET 102
-
-// AMD GPU Metadata Kind Enumeration Values.
-typedef uint16_t amdgpu_hsa_metadata_kind16_t;
-typedef enum {
- AMDGPU_HSA_METADATA_KIND_NONE = 0,
- AMDGPU_HSA_METADATA_KIND_INIT_SAMP = 1,
- AMDGPU_HSA_METADATA_KIND_INIT_ROIMG = 2,
- AMDGPU_HSA_METADATA_KIND_INIT_WOIMG = 3,
- AMDGPU_HSA_METADATA_KIND_INIT_RWIMG = 4
-} amdgpu_hsa_metadata_kind_t;
-
-// AMD GPU Sampler Coordinate Normalization Enumeration Values.
-typedef uint8_t amdgpu_hsa_sampler_coord8_t;
-typedef enum {
- AMDGPU_HSA_SAMPLER_COORD_UNNORMALIZED = 0,
- AMDGPU_HSA_SAMPLER_COORD_NORMALIZED = 1
-} amdgpu_hsa_sampler_coord_t;
-
-// AMD GPU Sampler Filter Enumeration Values.
-typedef uint8_t amdgpu_hsa_sampler_filter8_t;
-typedef enum {
- AMDGPU_HSA_SAMPLER_FILTER_NEAREST = 0,
- AMDGPU_HSA_SAMPLER_FILTER_LINEAR = 1
-} amdgpu_hsa_sampler_filter_t;
-
-// AMD GPU Sampler Addressing Enumeration Values.
-typedef uint8_t amdgpu_hsa_sampler_addressing8_t;
-typedef enum {
- AMDGPU_HSA_SAMPLER_ADDRESSING_UNDEFINED = 0,
- AMDGPU_HSA_SAMPLER_ADDRESSING_CLAMP_TO_EDGE = 1,
- AMDGPU_HSA_SAMPLER_ADDRESSING_CLAMP_TO_BORDER = 2,
- AMDGPU_HSA_SAMPLER_ADDRESSING_REPEAT = 3,
- AMDGPU_HSA_SAMPLER_ADDRESSING_MIRRORED_REPEAT = 4
-} amdgpu_hsa_sampler_addressing_t;
-
-// AMD GPU Sampler Descriptor.
-typedef struct amdgpu_hsa_sampler_descriptor_s {
- uint16_t size;
- amdgpu_hsa_metadata_kind16_t kind;
- amdgpu_hsa_sampler_coord8_t coord;
- amdgpu_hsa_sampler_filter8_t filter;
- amdgpu_hsa_sampler_addressing8_t addressing;
- uint8_t reserved1;
-} amdgpu_hsa_sampler_descriptor_t;
-
-// AMD GPU Image Geometry Enumeration Values.
-typedef uint8_t amdgpu_hsa_image_geometry8_t;
-typedef enum {
- AMDGPU_HSA_IMAGE_GEOMETRY_1D = 0,
- AMDGPU_HSA_IMAGE_GEOMETRY_2D = 1,
- AMDGPU_HSA_IMAGE_GEOMETRY_3D = 2,
- AMDGPU_HSA_IMAGE_GEOMETRY_1DA = 3,
- AMDGPU_HSA_IMAGE_GEOMETRY_2DA = 4,
- AMDGPU_HSA_IMAGE_GEOMETRY_1DB = 5,
- AMDGPU_HSA_IMAGE_GEOMETRY_2DDEPTH = 6,
- AMDGPU_HSA_IMAGE_GEOMETRY_2DADEPTH = 7
-} amdgpu_hsa_image_geometry_t;
-
-// AMD GPU Image Channel Order Enumeration Values.
-typedef uint8_t amdgpu_hsa_image_channel_order8_t;
-typedef enum {
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_A = 0,
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_R = 1,
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RX = 2,
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RG = 3,
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGX = 4,
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RA = 5,
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGB = 6,
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGBX = 7,
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGBA = 8,
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_BGRA = 9,
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_ARGB = 10,
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_ABGR = 11,
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGB = 12,
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGBX = 13,
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGBA = 14,
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SBGRA = 15,
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_INTENSITY = 16,
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_LUMINANCE = 17,
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_DEPTH = 18,
- AMDGPU_HSA_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL = 19
-} amdgpu_hsa_image_channel_order_t;
-
-// AMD GPU Image Channel Type Enumeration Values.
-typedef uint8_t amdgpu_hsa_image_channel_type8_t;
-typedef enum {
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SNORM_INT8 = 0,
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SNORM_INT16 = 1,
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT8 = 2,
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT16 = 3,
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT24 = 4,
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SHORT_555 = 5,
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SHORT_565 = 6,
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_INT_101010 = 7,
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT8 = 8,
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT16 = 9,
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT32 = 10,
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 = 11,
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 = 12,
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 = 13,
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_HALF_FLOAT = 14,
- AMDGPU_HSA_IMAGE_CHANNEL_TYPE_FLOAT = 15
-} amdgpu_hsa_image_channel_type_t;
-
-// AMD GPU Image Descriptor.
-typedef struct amdgpu_hsa_image_descriptor_s {
- uint16_t size;
- amdgpu_hsa_metadata_kind16_t kind;
- amdgpu_hsa_image_geometry8_t geometry;
- amdgpu_hsa_image_channel_order8_t channel_order;
- amdgpu_hsa_image_channel_type8_t channel_type;
- uint8_t reserved1;
- uint64_t width;
- uint64_t height;
- uint64_t depth;
- uint64_t array;
-} amdgpu_hsa_image_descriptor_t;
-
-typedef struct amdgpu_hsa_note_code_object_version_s {
- uint32_t major_version;
- uint32_t minor_version;
-} amdgpu_hsa_note_code_object_version_t;
-
-typedef struct amdgpu_hsa_note_hsail_s {
- uint32_t hsail_major_version;
- uint32_t hsail_minor_version;
- uint8_t profile;
- uint8_t machine_model;
- uint8_t default_float_round;
-} amdgpu_hsa_note_hsail_t;
-
-typedef struct amdgpu_hsa_note_isa_s {
- uint16_t vendor_name_size;
- uint16_t architecture_name_size;
- uint32_t major;
- uint32_t minor;
- uint32_t stepping;
- char vendor_and_architecture_name[1];
-} amdgpu_hsa_note_isa_t;
-
-typedef struct amdgpu_hsa_note_producer_s {
- uint16_t producer_name_size;
- uint16_t reserved;
- uint32_t producer_major_version;
- uint32_t producer_minor_version;
- char producer_name[1];
-} amdgpu_hsa_note_producer_t;
-
-typedef struct amdgpu_hsa_note_producer_options_s {
- uint16_t producer_options_size;
- char producer_options[1];
-} amdgpu_hsa_note_producer_options_t;
-
-typedef enum {
- AMDGPU_HSA_RODATA_GLOBAL_PROGRAM = 0,
- AMDGPU_HSA_RODATA_GLOBAL_AGENT,
- AMDGPU_HSA_RODATA_READONLY_AGENT,
- AMDGPU_HSA_DATA_GLOBAL_PROGRAM,
- AMDGPU_HSA_DATA_GLOBAL_AGENT,
- AMDGPU_HSA_DATA_READONLY_AGENT,
- AMDGPU_HSA_BSS_GLOBAL_PROGRAM,
- AMDGPU_HSA_BSS_GLOBAL_AGENT,
- AMDGPU_HSA_BSS_READONLY_AGENT,
- AMDGPU_HSA_SECTION_LAST,
-} amdgpu_hsa_elf_section_t;
-
-#endif // AMD_HSA_ELF_H
diff --git a/third_party/rocm/include/hsa/amd_hsa_kernel_code.h b/third_party/rocm/include/hsa/amd_hsa_kernel_code.h
deleted file mode 100644
index 901e49c..0000000
--- a/third_party/rocm/include/hsa/amd_hsa_kernel_code.h
+++ /dev/null
@@ -1,269 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-//
-// The University of Illinois/NCSA
-// Open Source License (NCSA)
-//
-// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
-//
-// Developed by:
-//
-// AMD Research and AMD HSA Software Development
-//
-// Advanced Micro Devices, Inc.
-//
-// www.amd.com
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to
-// deal with the Software without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-// and/or sell copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following conditions:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimers.
-// - Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimers in
-// the documentation and/or other materials provided with the distribution.
-// - Neither the names of Advanced Micro Devices, Inc,
-// nor the names of its contributors may be used to endorse or promote
-// products derived from this Software without specific prior written
-// permission.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-// DEALINGS WITH THE SOFTWARE.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef AMD_HSA_KERNEL_CODE_H
-#define AMD_HSA_KERNEL_CODE_H
-
-#include "amd_hsa_common.h"
-#include "hsa.h"
-
-// AMD Kernel Code Version Enumeration Values.
-typedef uint32_t amd_kernel_code_version32_t;
-enum amd_kernel_code_version_t {
- AMD_KERNEL_CODE_VERSION_MAJOR = 1,
- AMD_KERNEL_CODE_VERSION_MINOR = 1
-};
-
-// AMD Machine Kind Enumeration Values.
-typedef uint16_t amd_machine_kind16_t;
-enum amd_machine_kind_t {
- AMD_MACHINE_KIND_UNDEFINED = 0,
- AMD_MACHINE_KIND_AMDGPU = 1
-};
-
-// AMD Machine Version.
-typedef uint16_t amd_machine_version16_t;
-
-// AMD Float Round Mode Enumeration Values.
-enum amd_float_round_mode_t {
- AMD_FLOAT_ROUND_MODE_NEAREST_EVEN = 0,
- AMD_FLOAT_ROUND_MODE_PLUS_INFINITY = 1,
- AMD_FLOAT_ROUND_MODE_MINUS_INFINITY = 2,
- AMD_FLOAT_ROUND_MODE_ZERO = 3
-};
-
-// AMD Float Denorm Mode Enumeration Values.
-enum amd_float_denorm_mode_t {
- AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE_OUTPUT = 0,
- AMD_FLOAT_DENORM_MODE_FLUSH_OUTPUT = 1,
- AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE = 2,
- AMD_FLOAT_DENORM_MODE_NO_FLUSH = 3
-};
-
-// AMD Compute Program Resource Register One.
-typedef uint32_t amd_compute_pgm_rsrc_one32_t;
-enum amd_compute_pgm_rsrc_one_t {
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_PRIORITY, 10, 2),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_32, 12, 2),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_16_64, 14, 2),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_32, 16, 2),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_16_64, 18, 2),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_PRIV, 20, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_DX10_CLAMP, 21, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_DEBUG_MODE, 22, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_IEEE_MODE, 23, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_BULKY, 24, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_CDBG_USER, 25, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_RESERVED1, 26, 6)
-};
-
-// AMD System VGPR Workitem ID Enumeration Values.
-enum amd_system_vgpr_workitem_id_t {
- AMD_SYSTEM_VGPR_WORKITEM_ID_X = 0,
- AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y = 1,
- AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y_Z = 2,
- AMD_SYSTEM_VGPR_WORKITEM_ID_UNDEFINED = 3
-};
-
-// AMD Compute Program Resource Register Two.
-typedef uint32_t amd_compute_pgm_rsrc_two32_t;
-enum amd_compute_pgm_rsrc_two_t {
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_PRIVATE_SEGMENT_WAVE_BYTE_OFFSET, 0, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_USER_SGPR_COUNT, 1, 5),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_TRAP_HANDLER, 6, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_X, 7, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_INFO, 10, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_VGPR_WORKITEM_ID, 11, 2),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_ADDRESS_WATCH, 13, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_MEMORY_VIOLATION, 14, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_GRANULATED_LDS_SIZE, 15, 9),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 24, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 25, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 26, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 27, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 28, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 29, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_INT_DIVISION_BY_ZERO, 30, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_RESERVED1, 31, 1)
-};
-
-// AMD Element Byte Size Enumeration Values.
-enum amd_element_byte_size_t {
- AMD_ELEMENT_BYTE_SIZE_2 = 0,
- AMD_ELEMENT_BYTE_SIZE_4 = 1,
- AMD_ELEMENT_BYTE_SIZE_8 = 2,
- AMD_ELEMENT_BYTE_SIZE_16 = 3
-};
-
-// AMD Kernel Code Properties.
-typedef uint32_t amd_kernel_code_properties32_t;
-enum amd_kernel_code_properties_t {
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 0, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_PTR, 1, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_QUEUE_PTR, 2, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_ID, 4, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X, 7, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y, 8, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z, 9, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_RESERVED1, 10, 6),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_ORDERED_APPEND_GDS, 16, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_PRIVATE_ELEMENT_SIZE, 17, 2),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_PTR64, 19, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_DYNAMIC_CALLSTACK, 20, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_DEBUG_ENABLED, 21, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_XNACK_ENABLED, 22, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_RESERVED2, 23, 9)
-};
-
-// AMD Power Of Two Enumeration Values.
-typedef uint8_t amd_powertwo8_t;
-enum amd_powertwo_t {
- AMD_POWERTWO_1 = 0,
- AMD_POWERTWO_2 = 1,
- AMD_POWERTWO_4 = 2,
- AMD_POWERTWO_8 = 3,
- AMD_POWERTWO_16 = 4,
- AMD_POWERTWO_32 = 5,
- AMD_POWERTWO_64 = 6,
- AMD_POWERTWO_128 = 7,
- AMD_POWERTWO_256 = 8
-};
-
-// AMD Enabled Control Directive Enumeration Values.
-typedef uint64_t amd_enabled_control_directive64_t;
-enum amd_enabled_control_directive_t {
- AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_BREAK_EXCEPTIONS = 1,
- AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_DETECT_EXCEPTIONS = 2,
- AMD_ENABLED_CONTROL_DIRECTIVE_MAX_DYNAMIC_GROUP_SIZE = 4,
- AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_GRID_SIZE = 8,
- AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_WORKGROUP_SIZE = 16,
- AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_DIM = 32,
- AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_GRID_SIZE = 64,
- AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_WORKGROUP_SIZE = 128,
- AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRE_NO_PARTIAL_WORKGROUPS = 256
-};
-
-// AMD Exception Kind Enumeration Values.
-typedef uint16_t amd_exception_kind16_t;
-enum amd_exception_kind_t {
- AMD_EXCEPTION_KIND_INVALID_OPERATION = 1,
- AMD_EXCEPTION_KIND_DIVISION_BY_ZERO = 2,
- AMD_EXCEPTION_KIND_OVERFLOW = 4,
- AMD_EXCEPTION_KIND_UNDERFLOW = 8,
- AMD_EXCEPTION_KIND_INEXACT = 16
-};
-
-// AMD Control Directives.
-#define AMD_CONTROL_DIRECTIVES_ALIGN_BYTES 64
-#define AMD_CONTROL_DIRECTIVES_ALIGN __ALIGNED__(AMD_CONTROL_DIRECTIVES_ALIGN_BYTES)
-typedef AMD_CONTROL_DIRECTIVES_ALIGN struct amd_control_directives_s {
- amd_enabled_control_directive64_t enabled_control_directives;
- uint16_t enable_break_exceptions;
- uint16_t enable_detect_exceptions;
- uint32_t max_dynamic_group_size;
- uint64_t max_flat_grid_size;
- uint32_t max_flat_workgroup_size;
- uint8_t required_dim;
- uint8_t reserved1[3];
- uint64_t required_grid_size[3];
- uint32_t required_workgroup_size[3];
- uint8_t reserved2[60];
-} amd_control_directives_t;
-
-// AMD Kernel Code.
-#define AMD_ISA_ALIGN_BYTES 256
-#define AMD_KERNEL_CODE_ALIGN_BYTES 64
-#define AMD_KERNEL_CODE_ALIGN __ALIGNED__(AMD_KERNEL_CODE_ALIGN_BYTES)
-typedef AMD_KERNEL_CODE_ALIGN struct amd_kernel_code_s {
- amd_kernel_code_version32_t amd_kernel_code_version_major;
- amd_kernel_code_version32_t amd_kernel_code_version_minor;
- amd_machine_kind16_t amd_machine_kind;
- amd_machine_version16_t amd_machine_version_major;
- amd_machine_version16_t amd_machine_version_minor;
- amd_machine_version16_t amd_machine_version_stepping;
- int64_t kernel_code_entry_byte_offset;
- int64_t kernel_code_prefetch_byte_offset;
- uint64_t kernel_code_prefetch_byte_size;
- uint64_t max_scratch_backing_memory_byte_size;
- amd_compute_pgm_rsrc_one32_t compute_pgm_rsrc1;
- amd_compute_pgm_rsrc_two32_t compute_pgm_rsrc2;
- amd_kernel_code_properties32_t kernel_code_properties;
- uint32_t workitem_private_segment_byte_size;
- uint32_t workgroup_group_segment_byte_size;
- uint32_t gds_segment_byte_size;
- uint64_t kernarg_segment_byte_size;
- uint32_t workgroup_fbarrier_count;
- uint16_t wavefront_sgpr_count;
- uint16_t workitem_vgpr_count;
- uint16_t reserved_vgpr_first;
- uint16_t reserved_vgpr_count;
- uint16_t reserved_sgpr_first;
- uint16_t reserved_sgpr_count;
- uint16_t debug_wavefront_private_segment_offset_sgpr;
- uint16_t debug_private_segment_buffer_sgpr;
- amd_powertwo8_t kernarg_segment_alignment;
- amd_powertwo8_t group_segment_alignment;
- amd_powertwo8_t private_segment_alignment;
- amd_powertwo8_t wavefront_size;
- int32_t call_convention;
- uint8_t reserved1[12];
- uint64_t runtime_loader_kernel_symbol;
- amd_control_directives_t control_directives;
-} amd_kernel_code_t;
-
-// TODO: this struct should be completely gone once debugger designs/implements
-// Debugger APIs.
-typedef struct amd_runtime_loader_debug_info_s {
- const void* elf_raw;
- size_t elf_size;
- const char *kernel_name;
- const void *owning_segment;
-} amd_runtime_loader_debug_info_t;
-
-#endif // AMD_HSA_KERNEL_CODE_H
diff --git a/third_party/rocm/include/hsa/amd_hsa_queue.h b/third_party/rocm/include/hsa/amd_hsa_queue.h
deleted file mode 100644
index 8675ec4..0000000
--- a/third_party/rocm/include/hsa/amd_hsa_queue.h
+++ /dev/null
@@ -1,87 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-//
-// The University of Illinois/NCSA
-// Open Source License (NCSA)
-//
-// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
-//
-// Developed by:
-//
-// AMD Research and AMD HSA Software Development
-//
-// Advanced Micro Devices, Inc.
-//
-// www.amd.com
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to
-// deal with the Software without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-// and/or sell copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following conditions:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimers.
-// - Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimers in
-// the documentation and/or other materials provided with the distribution.
-// - Neither the names of Advanced Micro Devices, Inc,
-// nor the names of its contributors may be used to endorse or promote
-// products derived from this Software without specific prior written
-// permission.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-// DEALINGS WITH THE SOFTWARE.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef AMD_HSA_QUEUE_H
-#define AMD_HSA_QUEUE_H
-
-#include "amd_hsa_common.h"
-#include "hsa.h"
-
-// AMD Queue Properties.
-typedef uint32_t amd_queue_properties32_t;
-enum amd_queue_properties_t {
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER, 0, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_IS_PTR64, 1, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER_DEBUG_SGPRS, 2, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_PROFILING, 3, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_USE_SCRATCH_ONCE, 4, 1),
- AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_RESERVED1, 5, 27)
-};
-
-// AMD Queue.
-#define AMD_QUEUE_ALIGN_BYTES 64
-#define AMD_QUEUE_ALIGN __ALIGNED__(AMD_QUEUE_ALIGN_BYTES)
-typedef struct AMD_QUEUE_ALIGN amd_queue_s {
- hsa_queue_t hsa_queue;
- uint32_t reserved1[4];
- volatile uint64_t write_dispatch_id;
- uint32_t group_segment_aperture_base_hi;
- uint32_t private_segment_aperture_base_hi;
- uint32_t max_cu_id;
- uint32_t max_wave_id;
- volatile uint64_t max_legacy_doorbell_dispatch_id_plus_1;
- volatile uint32_t legacy_doorbell_lock;
- uint32_t reserved2[9];
- volatile uint64_t read_dispatch_id;
- uint32_t read_dispatch_id_field_base_byte_offset;
- uint32_t compute_tmpring_size;
- uint32_t scratch_resource_descriptor[4];
- uint64_t scratch_backing_memory_location;
- uint64_t scratch_backing_memory_byte_size;
- uint32_t scratch_wave64_lane_byte_size;
- amd_queue_properties32_t queue_properties;
- uint32_t reserved3[2];
- hsa_signal_t queue_inactive_signal;
- uint32_t reserved4[14];
-} amd_queue_t;
-
-#endif // AMD_HSA_QUEUE_H
diff --git a/third_party/rocm/include/hsa/amd_hsa_signal.h b/third_party/rocm/include/hsa/amd_hsa_signal.h
deleted file mode 100644
index f9d721f..0000000
--- a/third_party/rocm/include/hsa/amd_hsa_signal.h
+++ /dev/null
@@ -1,80 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-//
-// The University of Illinois/NCSA
-// Open Source License (NCSA)
-//
-// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
-//
-// Developed by:
-//
-// AMD Research and AMD HSA Software Development
-//
-// Advanced Micro Devices, Inc.
-//
-// www.amd.com
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to
-// deal with the Software without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-// and/or sell copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following conditions:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimers.
-// - Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimers in
-// the documentation and/or other materials provided with the distribution.
-// - Neither the names of Advanced Micro Devices, Inc,
-// nor the names of its contributors may be used to endorse or promote
-// products derived from this Software without specific prior written
-// permission.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-// DEALINGS WITH THE SOFTWARE.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef AMD_HSA_SIGNAL_H
-#define AMD_HSA_SIGNAL_H
-
-#include "amd_hsa_common.h"
-#include "amd_hsa_queue.h"
-
-// AMD Signal Kind Enumeration Values.
-typedef int64_t amd_signal_kind64_t;
-enum amd_signal_kind_t {
- AMD_SIGNAL_KIND_INVALID = 0,
- AMD_SIGNAL_KIND_USER = 1,
- AMD_SIGNAL_KIND_DOORBELL = -1,
- AMD_SIGNAL_KIND_LEGACY_DOORBELL = -2
-};
-
-// AMD Signal.
-#define AMD_SIGNAL_ALIGN_BYTES 64
-#define AMD_SIGNAL_ALIGN __ALIGNED__(AMD_SIGNAL_ALIGN_BYTES)
-typedef struct AMD_SIGNAL_ALIGN amd_signal_s {
- amd_signal_kind64_t kind;
- union {
- volatile int64_t value;
- volatile uint32_t* legacy_hardware_doorbell_ptr;
- volatile uint64_t* hardware_doorbell_ptr;
- };
- uint64_t event_mailbox_ptr;
- uint32_t event_id;
- uint32_t reserved1;
- uint64_t start_ts;
- uint64_t end_ts;
- union {
- amd_queue_t* queue_ptr;
- uint64_t reserved2;
- };
- uint32_t reserved3[2];
-} amd_signal_t;
-
-#endif // AMD_HSA_SIGNAL_H
diff --git a/third_party/rocm/include/hsa/hsa.h b/third_party/rocm/include/hsa/hsa.h
deleted file mode 100644
index d8fdd47..0000000
--- a/third_party/rocm/include/hsa/hsa.h
+++ /dev/null
@@ -1,5660 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-//
-// The University of Illinois/NCSA
-// Open Source License (NCSA)
-//
-// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
-//
-// Developed by:
-//
-// AMD Research and AMD HSA Software Development
-//
-// Advanced Micro Devices, Inc.
-//
-// www.amd.com
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to
-// deal with the Software without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-// and/or sell copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following conditions:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimers.
-// - Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimers in
-// the documentation and/or other materials provided with the distribution.
-// - Neither the names of Advanced Micro Devices, Inc,
-// nor the names of its contributors may be used to endorse or promote
-// products derived from this Software without specific prior written
-// permission.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-// DEALINGS WITH THE SOFTWARE.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef HSA_RUNTIME_INC_HSA_H_
-#define HSA_RUNTIME_INC_HSA_H_
-
-#include <stddef.h> /* size_t */
-#include <stdint.h> /* uintXX_t */
-
-#ifndef __cplusplus
-#include <stdbool.h> /* bool */
-#endif /* __cplusplus */
-
-// Placeholder for calling convention and import/export macros
-#ifndef HSA_CALL
-#define HSA_CALL
-#endif
-
-#ifndef HSA_EXPORT_DECORATOR
-#ifdef __GNUC__
-#define HSA_EXPORT_DECORATOR __attribute__ ((visibility ("default")))
-#else
-#define HSA_EXPORT_DECORATOR
-#endif
-#endif
-#define HSA_API_EXPORT HSA_EXPORT_DECORATOR HSA_CALL
-#define HSA_API_IMPORT HSA_CALL
-
-#if !defined(HSA_API) && defined(HSA_EXPORT)
-#define HSA_API HSA_API_EXPORT
-#else
-#define HSA_API HSA_API_IMPORT
-#endif
-
-// Detect and set large model builds.
-#undef HSA_LARGE_MODEL
-#if defined(__LP64__) || defined(_M_X64)
-#define HSA_LARGE_MODEL
-#endif
-
-// Try to detect CPU endianness
-#if !defined(LITTLEENDIAN_CPU) && !defined(BIGENDIAN_CPU)
-#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || \
- defined(_M_X64)
-#define LITTLEENDIAN_CPU
-#endif
-#endif
-
-#undef HSA_LITTLE_ENDIAN
-#if defined(LITTLEENDIAN_CPU)
-#define HSA_LITTLE_ENDIAN
-#elif defined(BIGENDIAN_CPU)
-#else
-#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined"
-#endif
-
-#ifndef HSA_DEPRECATED
-#define HSA_DEPRECATED
-//#ifdef __GNUC__
-//#define HSA_DEPRECATED __attribute__((deprecated))
-//#else
-//#define HSA_DEPRECATED __declspec(deprecated)
-//#endif
-#endif
-
-#define HSA_VERSION_1_0 1
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/** \defgroup status Runtime Notifications
- * @{
- */
-
-/**
- * @brief Status codes.
- */
-typedef enum {
- /**
- * The function has been executed successfully.
- */
- HSA_STATUS_SUCCESS = 0x0,
- /**
- * A traversal over a list of elements has been interrupted by the
- * application before completing.
- */
- HSA_STATUS_INFO_BREAK = 0x1,
- /**
- * A generic error has occurred.
- */
- HSA_STATUS_ERROR = 0x1000,
- /**
- * One of the actual arguments does not meet a precondition stated in the
- * documentation of the corresponding formal argument.
- */
- HSA_STATUS_ERROR_INVALID_ARGUMENT = 0x1001,
- /**
- * The requested queue creation is not valid.
- */
- HSA_STATUS_ERROR_INVALID_QUEUE_CREATION = 0x1002,
- /**
- * The requested allocation is not valid.
- */
- HSA_STATUS_ERROR_INVALID_ALLOCATION = 0x1003,
- /**
- * The agent is invalid.
- */
- HSA_STATUS_ERROR_INVALID_AGENT = 0x1004,
- /**
- * The memory region is invalid.
- */
- HSA_STATUS_ERROR_INVALID_REGION = 0x1005,
- /**
- * The signal is invalid.
- */
- HSA_STATUS_ERROR_INVALID_SIGNAL = 0x1006,
- /**
- * The queue is invalid.
- */
- HSA_STATUS_ERROR_INVALID_QUEUE = 0x1007,
- /**
- * The HSA runtime failed to allocate the necessary resources. This error
- * may also occur when the HSA runtime needs to spawn threads or create
- * internal OS-specific events.
- */
- HSA_STATUS_ERROR_OUT_OF_RESOURCES = 0x1008,
- /**
- * The AQL packet is malformed.
- */
- HSA_STATUS_ERROR_INVALID_PACKET_FORMAT = 0x1009,
- /**
- * An error has been detected while releasing a resource.
- */
- HSA_STATUS_ERROR_RESOURCE_FREE = 0x100A,
- /**
- * An API other than ::hsa_init has been invoked while the reference count
- * of the HSA runtime is 0.
- */
- HSA_STATUS_ERROR_NOT_INITIALIZED = 0x100B,
- /**
- * The maximum reference count for the object has been reached.
- */
- HSA_STATUS_ERROR_REFCOUNT_OVERFLOW = 0x100C,
- /**
- * The arguments passed to a functions are not compatible.
- */
- HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS = 0x100D,
- /**
- * The index is invalid.
- */
- HSA_STATUS_ERROR_INVALID_INDEX = 0x100E,
- /**
- * The instruction set architecture is invalid.
- */
- HSA_STATUS_ERROR_INVALID_ISA = 0x100F,
- /**
- * The instruction set architecture name is invalid.
- */
- HSA_STATUS_ERROR_INVALID_ISA_NAME = 0x1017,
- /**
- * The code object is invalid.
- */
- HSA_STATUS_ERROR_INVALID_CODE_OBJECT = 0x1010,
- /**
- * The executable is invalid.
- */
- HSA_STATUS_ERROR_INVALID_EXECUTABLE = 0x1011,
- /**
- * The executable is frozen.
- */
- HSA_STATUS_ERROR_FROZEN_EXECUTABLE = 0x1012,
- /**
- * There is no symbol with the given name.
- */
- HSA_STATUS_ERROR_INVALID_SYMBOL_NAME = 0x1013,
- /**
- * The variable is already defined.
- */
- HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED = 0x1014,
- /**
- * The variable is undefined.
- */
- HSA_STATUS_ERROR_VARIABLE_UNDEFINED = 0x1015,
- /**
- * An HSAIL operation resulted in a hardware exception.
- */
- HSA_STATUS_ERROR_EXCEPTION = 0x1016,
- /**
- * The code object symbol is invalid.
- */
- HSA_STATUS_ERROR_INVALID_CODE_SYMBOL = 0x1018,
- /**
- * The executable symbol is invalid.
- */
- HSA_STATUS_ERROR_INVALID_EXECUTABLE_SYMBOL = 0x1019,
- /**
- * The file descriptor is invalid.
- */
- HSA_STATUS_ERROR_INVALID_FILE = 0x1020,
- /**
- * The code object reader is invalid.
- */
- HSA_STATUS_ERROR_INVALID_CODE_OBJECT_READER = 0x1021,
- /**
- * The cache is invalid.
- */
- HSA_STATUS_ERROR_INVALID_CACHE = 0x1022,
- /**
- * The wavefront is invalid.
- */
- HSA_STATUS_ERROR_INVALID_WAVEFRONT = 0x1023,
- /**
- * The signal group is invalid.
- */
- HSA_STATUS_ERROR_INVALID_SIGNAL_GROUP = 0x1024,
- /**
- * The HSA runtime is not in the configuration state.
- */
- HSA_STATUS_ERROR_INVALID_RUNTIME_STATE = 0x1025,
- /**
- * The queue received an error that may require process termination.
- */
- HSA_STATUS_ERROR_FATAL = 0x1026
-} hsa_status_t;
-
-/**
- * @brief Query additional information about a status code.
- *
- * @param[in] status Status code.
- *
- * @param[out] status_string A NUL-terminated string that describes the error
- * status.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p status is an invalid
- * status code, or @p status_string is NULL.
- */
-hsa_status_t HSA_API hsa_status_string(
- hsa_status_t status,
- const char ** status_string);
-
-/** @} */
-
-/** \defgroup common Common Definitions
- * @{
- */
-
-/**
- * @brief Three-dimensional coordinate.
- */
-typedef struct hsa_dim3_s {
- /**
- * X dimension.
- */
- uint32_t x;
-
- /**
- * Y dimension.
- */
- uint32_t y;
-
- /**
- * Z dimension.
- */
- uint32_t z;
-} hsa_dim3_t;
-
-/**
- * @brief Access permissions.
- */
-typedef enum {
- /**
- * Read-only access.
- */
- HSA_ACCESS_PERMISSION_RO = 1,
- /**
- * Write-only access.
- */
- HSA_ACCESS_PERMISSION_WO = 2,
- /**
- * Read and write access.
- */
- HSA_ACCESS_PERMISSION_RW = 3
-} hsa_access_permission_t;
-
-/**
- * @brief POSIX file descriptor.
- */
-typedef int hsa_file_t;
-
-/** @} **/
-
-
-/** \defgroup initshutdown Initialization and Shut Down
- * @{
- */
-
-/**
- * @brief Initialize the HSA runtime.
- *
- * @details Initializes the HSA runtime if it is not already initialized, and
- * increases the reference counter associated with the HSA runtime for the
- * current process. Invocation of any HSA function other than ::hsa_init results
- * in undefined behavior if the current HSA runtime reference counter is less
- * than one.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
- * the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_REFCOUNT_OVERFLOW The HSA runtime reference
- * count reaches INT32_MAX.
- */
-hsa_status_t HSA_API hsa_init();
-
-/**
- * @brief Shut down the HSA runtime.
- *
- * @details Decreases the reference count of the HSA runtime instance. When the
- * reference count reaches 0, the HSA runtime is no longer considered valid
- * but the application might call ::hsa_init to initialize the HSA runtime
- * again.
- *
- * Once the reference count of the HSA runtime reaches 0, all the resources
- * associated with it (queues, signals, agent information, etc.) are
- * considered invalid and any attempt to reference them in subsequent API calls
- * results in undefined behavior. When the reference count reaches 0, the HSA
- * runtime may release resources associated with it.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- */
-hsa_status_t HSA_API hsa_shut_down();
-
-/** @} **/
-
-/** \defgroup agentinfo System and Agent Information
- * @{
- */
-
-/**
- * @brief Endianness. A convention used to interpret the bytes making up a data
- * word.
- */
-typedef enum {
- /**
- * The least significant byte is stored in the smallest address.
- */
- HSA_ENDIANNESS_LITTLE = 0,
- /**
- * The most significant byte is stored in the smallest address.
- */
- HSA_ENDIANNESS_BIG = 1
-} hsa_endianness_t;
-
-/**
- * @brief Machine model. A machine model determines the size of certain data
- * types in HSA runtime and an agent.
- */
-typedef enum {
- /**
- * Small machine model. Addresses use 32 bits.
- */
- HSA_MACHINE_MODEL_SMALL = 0,
- /**
- * Large machine model. Addresses use 64 bits.
- */
- HSA_MACHINE_MODEL_LARGE = 1
-} hsa_machine_model_t;
-
-/**
- * @brief Profile. A profile indicates a particular level of feature
- * support. For example, in the base profile the application must use the HSA
- * runtime allocator to reserve shared virtual memory, while in the full profile
- * any host pointer can be shared across all the agents.
- */
-typedef enum {
- /**
- * Base profile.
- */
- HSA_PROFILE_BASE = 0,
- /**
- * Full profile.
- */
- HSA_PROFILE_FULL = 1
-} hsa_profile_t;
-
-/**
- * @brief System attributes.
- */
-typedef enum {
- /**
- * Major version of the HSA runtime specification supported by the
- * implementation. The type of this attribute is uint16_t.
- */
- HSA_SYSTEM_INFO_VERSION_MAJOR = 0,
- /**
- * Minor version of the HSA runtime specification supported by the
- * implementation. The type of this attribute is uint16_t.
- */
- HSA_SYSTEM_INFO_VERSION_MINOR = 1,
- /**
- * Current timestamp. The value of this attribute monotonically increases at a
- * constant rate. The type of this attribute is uint64_t.
- */
- HSA_SYSTEM_INFO_TIMESTAMP = 2,
- /**
- * Timestamp value increase rate, in Hz. The timestamp (clock) frequency is
- * in the range 1-400MHz. The type of this attribute is uint64_t.
- */
- HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY = 3,
- /**
- * Maximum duration of a signal wait operation. Expressed as a count based on
- * the timestamp frequency. The type of this attribute is uint64_t.
- */
- HSA_SYSTEM_INFO_SIGNAL_MAX_WAIT = 4,
- /**
- * Endianness of the system. The type of this attribute is ::hsa_endianness_t.
- */
- HSA_SYSTEM_INFO_ENDIANNESS = 5,
- /**
- * Machine model supported by the HSA runtime. The type of this attribute is
- * ::hsa_machine_model_t.
- */
- HSA_SYSTEM_INFO_MACHINE_MODEL = 6,
- /**
- * Bit-mask indicating which extensions are supported by the
- * implementation. An extension with an ID of @p i is supported if the bit at
- * position @p i is set. The type of this attribute is uint8_t[128].
- */
- HSA_SYSTEM_INFO_EXTENSIONS = 7,
- /**
- * String containing the ROCr build identifier.
- */
- HSA_AMD_SYSTEM_INFO_BUILD_VERSION = 0x200
-} hsa_system_info_t;
-
-/**
- * @brief Get the current value of a system attribute.
- *
- * @param[in] attribute Attribute to query.
- *
- * @param[out] value Pointer to an application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
- * system attribute, or @p value is NULL.
- */
-hsa_status_t HSA_API hsa_system_get_info(
- hsa_system_info_t attribute,
- void* value);
-
-/**
- * @brief HSA extensions.
- */
-typedef enum {
- /**
- * Finalizer extension.
- */
- HSA_EXTENSION_FINALIZER = 0,
- /**
- * Images extension.
- */
- HSA_EXTENSION_IMAGES = 1,
-
- /**
- * Performance counter extension.
- */
- HSA_EXTENSION_PERFORMANCE_COUNTERS = 2,
-
- /**
- * Profiling events extension.
- */
- HSA_EXTENSION_PROFILING_EVENTS = 3,
- /**
- * Extension count.
- */
- HSA_EXTENSION_STD_LAST = 3,
- /**
- * First AMD extension number.
- */
- HSA_AMD_FIRST_EXTENSION = 0x200,
- /**
- * Profiler extension.
- */
- HSA_EXTENSION_AMD_PROFILER = 0x200,
- /**
- * Loader extension.
- */
- HSA_EXTENSION_AMD_LOADER = 0x201,
- /**
- * AqlProfile extension.
- */
- HSA_EXTENSION_AMD_AQLPROFILE = 0x202,
- /**
- * Last AMD extension.
- */
- HSA_AMD_LAST_EXTENSION = 0x202
-} hsa_extension_t;
-
-/**
- * @brief Query the name of a given extension.
- *
- * @param[in] extension Extension identifier. If the extension is not supported
- * by the implementation (see ::HSA_SYSTEM_INFO_EXTENSIONS), the behavior
- * is undefined.
- *
- * @param[out] name Pointer to a memory location where the HSA runtime stores
- * the extension name. The extension name is a NUL-terminated string.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid
- * extension, or @p name is NULL.
- */
-hsa_status_t HSA_API hsa_extension_get_name(
- uint16_t extension,
- const char **name);
-
-/**
- * @deprecated
- *
- * @brief Query if a given version of an extension is supported by the HSA
- * implementation.
- *
- * @param[in] extension Extension identifier.
- *
- * @param[in] version_major Major version number.
- *
- * @param[in] version_minor Minor version number.
- *
- * @param[out] result Pointer to a memory location where the HSA runtime stores
- * the result of the check. The result is true if the specified version of the
- * extension is supported, and false otherwise.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid
- * extension, or @p result is NULL.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_system_extension_supported(
- uint16_t extension,
- uint16_t version_major,
- uint16_t version_minor,
- bool* result);
-
-/**
- * @brief Query if a given version of an extension is supported by the HSA
- * implementation. All minor versions from 0 up to the returned @p version_minor
- * must be supported by the implementation.
- *
- * @param[in] extension Extension identifier.
- *
- * @param[in] version_major Major version number.
- *
- * @param[out] version_minor Minor version number.
- *
- * @param[out] result Pointer to a memory location where the HSA runtime stores
- * the result of the check. The result is true if the specified version of the
- * extension is supported, and false otherwise.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid
- * extension, or @p version_minor is NULL, or @p result is NULL.
- */
-hsa_status_t HSA_API hsa_system_major_extension_supported(
- uint16_t extension,
- uint16_t version_major,
- uint16_t *version_minor,
- bool* result);
-
-
-/**
- * @deprecated
- *
- * @brief Retrieve the function pointers corresponding to a given version of an
- * extension. Portable applications are expected to invoke the extension API
- * using the returned function pointers
- *
- * @details The application is responsible for verifying that the given version
- * of the extension is supported by the HSA implementation (see
- * ::hsa_system_extension_supported). If the given combination of extension,
- * major version, and minor version is not supported by the implementation, the
- * behavior is undefined.
- *
- * @param[in] extension Extension identifier.
- *
- * @param[in] version_major Major version number for which to retrieve the
- * function pointer table.
- *
- * @param[in] version_minor Minor version number for which to retrieve the
- * function pointer table.
- *
- * @param[out] table Pointer to an application-allocated function pointer table
- * that is populated by the HSA runtime. Must not be NULL. The memory associated
- * with table can be reused or freed after the function returns.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid
- * extension, or @p table is NULL.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_system_get_extension_table(
- uint16_t extension,
- uint16_t version_major,
- uint16_t version_minor,
- void *table);
-
-/**
- * @brief Retrieve the function pointers corresponding to a given major version
- * of an extension. Portable applications are expected to invoke the extension
- * API using the returned function pointers.
- *
- * @details The application is responsible for verifying that the given major
- * version of the extension is supported by the HSA implementation (see
- * ::hsa_system_major_extension_supported). If the given combination of extension
- * and major version is not supported by the implementation, the behavior is
- * undefined. Additionally if the length doesn't allow space for a full minor
- * version, it is implementation defined if only some of the function pointers for
- * that minor version get written.
- *
- * @param[in] extension Extension identifier.
- *
- * @param[in] version_major Major version number for which to retrieve the
- * function pointer table.
- *
- * @param[in] table_length Size in bytes of the function pointer table to be
- * populated. The implementation will not write more than this many bytes to the
- * table.
- *
- * @param[out] table Pointer to an application-allocated function pointer table
- * that is populated by the HSA runtime. Must not be NULL. The memory associated
- * with table can be reused or freed after the function returns.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid
- * extension, or @p table is NULL.
- */
-hsa_status_t HSA_API hsa_system_get_major_extension_table(
- uint16_t extension,
- uint16_t version_major,
- size_t table_length,
- void *table);
-
-/**
- * @brief Struct containing an opaque handle to an agent, a device that participates in
- * the HSA memory model. An agent can submit AQL packets for execution, and
- * may also accept AQL packets for execution (agent dispatch packets or kernel
- * dispatch packets launching HSAIL-derived binaries).
- */
-typedef struct hsa_agent_s {
- /**
- * Opaque handle. Two handles reference the same object of the enclosing type
- * if and only if they are equal.
- */
- uint64_t handle;
-} hsa_agent_t;
-
-/**
- * @brief Agent features.
- */
-typedef enum {
- /**
- * The agent supports AQL packets of kernel dispatch type. If this
- * feature is enabled, the agent is also a kernel agent.
- */
- HSA_AGENT_FEATURE_KERNEL_DISPATCH = 1,
- /**
- * The agent supports AQL packets of agent dispatch type.
- */
- HSA_AGENT_FEATURE_AGENT_DISPATCH = 2
-} hsa_agent_feature_t;
-
-/**
- * @brief Hardware device type.
- */
-typedef enum {
- /**
- * CPU device.
- */
- HSA_DEVICE_TYPE_CPU = 0,
- /**
- * GPU device.
- */
- HSA_DEVICE_TYPE_GPU = 1,
- /**
- * DSP device.
- */
- HSA_DEVICE_TYPE_DSP = 2
-} hsa_device_type_t;
-
-/**
- * @brief Default floating-point rounding mode.
- */
-typedef enum {
- /**
- * Use a default floating-point rounding mode specified elsewhere.
- */
- HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT = 0,
- /**
- * Operations that specify the default floating-point mode are rounded to zero
- * by default.
- */
- HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO = 1,
- /**
- * Operations that specify the default floating-point mode are rounded to the
- * nearest representable number and that ties should be broken by selecting
- * the value with an even least significant bit.
- */
- HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR = 2
-} hsa_default_float_rounding_mode_t;
-
-/**
- * @brief Agent attributes.
- */
-typedef enum {
- /**
- * Agent name. The type of this attribute is a NUL-terminated char[64]. The
- * name must be at most 63 characters long (not including the NUL terminator)
- * and all array elements not used for the name must be NUL.
- */
- HSA_AGENT_INFO_NAME = 0,
- /**
- * Name of vendor. The type of this attribute is a NUL-terminated char[64].
- * The name must be at most 63 characters long (not including the NUL
- * terminator) and all array elements not used for the name must be NUL.
- */
- HSA_AGENT_INFO_VENDOR_NAME = 1,
- /**
- * Agent capability. The type of this attribute is ::hsa_agent_feature_t.
- */
- HSA_AGENT_INFO_FEATURE = 2,
- /**
- * @deprecated Query ::HSA_ISA_INFO_MACHINE_MODELS for a given intruction set
- * architecture supported by the agent instead. If more than one ISA is
- * supported by the agent, the returned value corresponds to the first ISA
- * enumerated by ::hsa_agent_iterate_isas.
- *
- * Machine model supported by the agent. The type of this attribute is
- * ::hsa_machine_model_t.
- */
- HSA_AGENT_INFO_MACHINE_MODEL = 3,
- /**
- * @deprecated Query ::HSA_ISA_INFO_PROFILES for a given intruction set
- * architecture supported by the agent instead. If more than one ISA is
- * supported by the agent, the returned value corresponds to the first ISA
- * enumerated by ::hsa_agent_iterate_isas.
- *
- * Profile supported by the agent. The type of this attribute is
- * ::hsa_profile_t.
- */
- HSA_AGENT_INFO_PROFILE = 4,
- /**
- * @deprecated Query ::HSA_ISA_INFO_DEFAULT_FLOAT_ROUNDING_MODES for a given
- * intruction set architecture supported by the agent instead. If more than
- * one ISA is supported by the agent, the returned value corresponds to the
- * first ISA enumerated by ::hsa_agent_iterate_isas.
- *
- * Default floating-point rounding mode. The type of this attribute is
- * ::hsa_default_float_rounding_mode_t, but the value
- * ::HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT is not allowed.
- */
- HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 5,
- /**
- * @deprecated Query ::HSA_ISA_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES
- * for a given intruction set architecture supported by the agent instead. If
- * more than one ISA is supported by the agent, the returned value corresponds
- * to the first ISA enumerated by ::hsa_agent_iterate_isas.
- *
- * A bit-mask of ::hsa_default_float_rounding_mode_t values, representing the
- * default floating-point rounding modes supported by the agent in the Base
- * profile. The type of this attribute is uint32_t. The default floating-point
- * rounding mode (::HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE) bit must not
- * be set.
- */
- HSA_AGENT_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES = 23,
- /**
- * @deprecated Query ::HSA_ISA_INFO_FAST_F16_OPERATION for a given intruction
- * set architecture supported by the agent instead. If more than one ISA is
- * supported by the agent, the returned value corresponds to the first ISA
- * enumerated by ::hsa_agent_iterate_isas.
- *
- * Flag indicating that the f16 HSAIL operation is at least as fast as the
- * f32 operation in the current agent. The value of this attribute is
- * undefined if the agent is not a kernel agent. The type of this
- * attribute is bool.
- */
- HSA_AGENT_INFO_FAST_F16_OPERATION = 24,
- /**
- * @deprecated Query ::HSA_WAVEFRONT_INFO_SIZE for a given wavefront and
- * intruction set architecture supported by the agent instead. If more than
- * one ISA is supported by the agent, the returned value corresponds to the
- * first ISA enumerated by ::hsa_agent_iterate_isas and the first wavefront
- * enumerated by ::hsa_isa_iterate_wavefronts for that ISA.
- *
- * Number of work-items in a wavefront. Must be a power of 2 in the range
- * [1,256]. The value of this attribute is undefined if the agent is not
- * a kernel agent. The type of this attribute is uint32_t.
- */
- HSA_AGENT_INFO_WAVEFRONT_SIZE = 6,
- /**
- * @deprecated Query ::HSA_ISA_INFO_WORKGROUP_MAX_DIM for a given intruction
- * set architecture supported by the agent instead. If more than one ISA is
- * supported by the agent, the returned value corresponds to the first ISA
- * enumerated by ::hsa_agent_iterate_isas.
- *
- * Maximum number of work-items of each dimension of a work-group. Each
- * maximum must be greater than 0. No maximum can exceed the value of
- * ::HSA_AGENT_INFO_WORKGROUP_MAX_SIZE. The value of this attribute is
- * undefined if the agent is not a kernel agent. The type of this
- * attribute is uint16_t[3].
- */
- HSA_AGENT_INFO_WORKGROUP_MAX_DIM = 7,
- /**
- * @deprecated Query ::HSA_ISA_INFO_WORKGROUP_MAX_SIZE for a given intruction
- * set architecture supported by the agent instead. If more than one ISA is
- * supported by the agent, the returned value corresponds to the first ISA
- * enumerated by ::hsa_agent_iterate_isas.
- *
- * Maximum total number of work-items in a work-group. The value of this
- * attribute is undefined if the agent is not a kernel agent. The type
- * of this attribute is uint32_t.
- */
- HSA_AGENT_INFO_WORKGROUP_MAX_SIZE = 8,
- /**
- * @deprecated Query ::HSA_ISA_INFO_GRID_MAX_DIM for a given intruction set
- * architecture supported by the agent instead.
- *
- * Maximum number of work-items of each dimension of a grid. Each maximum must
- * be greater than 0, and must not be smaller than the corresponding value in
- * ::HSA_AGENT_INFO_WORKGROUP_MAX_DIM. No maximum can exceed the value of
- * ::HSA_AGENT_INFO_GRID_MAX_SIZE. The value of this attribute is undefined
- * if the agent is not a kernel agent. The type of this attribute is
- * ::hsa_dim3_t.
- */
- HSA_AGENT_INFO_GRID_MAX_DIM = 9,
- /**
- * @deprecated Query ::HSA_ISA_INFO_GRID_MAX_SIZE for a given intruction set
- * architecture supported by the agent instead. If more than one ISA is
- * supported by the agent, the returned value corresponds to the first ISA
- * enumerated by ::hsa_agent_iterate_isas.
- *
- * Maximum total number of work-items in a grid. The value of this attribute
- * is undefined if the agent is not a kernel agent. The type of this
- * attribute is uint32_t.
- */
- HSA_AGENT_INFO_GRID_MAX_SIZE = 10,
- /**
- * @deprecated Query ::HSA_ISA_INFO_FBARRIER_MAX_SIZE for a given intruction
- * set architecture supported by the agent instead. If more than one ISA is
- * supported by the agent, the returned value corresponds to the first ISA
- * enumerated by ::hsa_agent_iterate_isas.
- *
- * Maximum number of fbarriers per work-group. Must be at least 32. The value
- * of this attribute is undefined if the agent is not a kernel agent. The
- * type of this attribute is uint32_t.
- */
- HSA_AGENT_INFO_FBARRIER_MAX_SIZE = 11,
- /**
- * @deprecated The maximum number of queues is not statically determined.
- *
- * Maximum number of queues that can be active (created but not destroyed) at
- * one time in the agent. The type of this attribute is uint32_t.
- */
- HSA_AGENT_INFO_QUEUES_MAX = 12,
- /**
- * Minimum number of packets that a queue created in the agent
- * can hold. Must be a power of 2 greater than 0. Must not exceed
- * the value of ::HSA_AGENT_INFO_QUEUE_MAX_SIZE. The type of this
- * attribute is uint32_t.
- */
- HSA_AGENT_INFO_QUEUE_MIN_SIZE = 13,
- /**
- * Maximum number of packets that a queue created in the agent can
- * hold. Must be a power of 2 greater than 0. The type of this attribute
- * is uint32_t.
- */
- HSA_AGENT_INFO_QUEUE_MAX_SIZE = 14,
- /**
- * Type of a queue created in the agent. The type of this attribute is
- * ::hsa_queue_type32_t.
- */
- HSA_AGENT_INFO_QUEUE_TYPE = 15,
- /**
- * @deprecated NUMA information is not exposed anywhere else in the API.
- *
- * Identifier of the NUMA node associated with the agent. The type of this
- * attribute is uint32_t.
- */
- HSA_AGENT_INFO_NODE = 16,
- /**
- * Type of hardware device associated with the agent. The type of this
- * attribute is ::hsa_device_type_t.
- */
- HSA_AGENT_INFO_DEVICE = 17,
- /**
- * @deprecated Query ::hsa_agent_iterate_caches to retrieve information about
- * the caches present in a given agent.
- *
- * Array of data cache sizes (L1..L4). Each size is expressed in bytes. A size
- * of 0 for a particular level indicates that there is no cache information
- * for that level. The type of this attribute is uint32_t[4].
- */
- HSA_AGENT_INFO_CACHE_SIZE = 18,
- /**
- * @deprecated An agent may support multiple instruction set
- * architectures. See ::hsa_agent_iterate_isas. If more than one ISA is
- * supported by the agent, the returned value corresponds to the first ISA
- * enumerated by ::hsa_agent_iterate_isas.
- *
- * Instruction set architecture of the agent. The type of this attribute
- * is ::hsa_isa_t.
- */
- HSA_AGENT_INFO_ISA = 19,
- /**
- * Bit-mask indicating which extensions are supported by the agent. An
- * extension with an ID of @p i is supported if the bit at position @p i is
- * set. The type of this attribute is uint8_t[128].
- */
- HSA_AGENT_INFO_EXTENSIONS = 20,
- /**
- * Major version of the HSA runtime specification supported by the
- * agent. The type of this attribute is uint16_t.
- */
- HSA_AGENT_INFO_VERSION_MAJOR = 21,
- /**
- * Minor version of the HSA runtime specification supported by the
- * agent. The type of this attribute is uint16_t.
- */
- HSA_AGENT_INFO_VERSION_MINOR = 22
-
-} hsa_agent_info_t;
-
-/**
- * @brief Get the current value of an attribute for a given agent.
- *
- * @param[in] agent A valid agent.
- *
- * @param[in] attribute Attribute to query.
- *
- * @param[out] value Pointer to an application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
- * agent attribute, or @p value is NULL.
- */
-hsa_status_t HSA_API hsa_agent_get_info(
- hsa_agent_t agent,
- hsa_agent_info_t attribute,
- void* value);
-
-/**
- * @brief Iterate over the available agents, and invoke an
- * application-defined callback on every iteration.
- *
- * @param[in] callback Callback to be invoked once per agent. The HSA
- * runtime passes two arguments to the callback: the agent and the
- * application data. If @p callback returns a status other than
- * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
- * ::hsa_iterate_agents returns that status value.
- *
- * @param[in] data Application data that is passed to @p callback on every
- * iteration. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
-*/
-hsa_status_t HSA_API hsa_iterate_agents(
- hsa_status_t (*callback)(hsa_agent_t agent, void* data),
- void* data);
-
-/*
-
-// If we do not know the size of an attribute, we need to query it first
-// Note: this API will not be in the spec unless needed
-hsa_status_t HSA_API hsa_agent_get_info_size(
- hsa_agent_t agent,
- hsa_agent_info_t attribute,
- size_t* size);
-
-// Set the value of an agents attribute
-// Note: this API will not be in the spec unless needed
-hsa_status_t HSA_API hsa_agent_set_info(
- hsa_agent_t agent,
- hsa_agent_info_t attribute,
- void* value);
-
-*/
-
-/**
- * @brief Exception policies applied in the presence of hardware exceptions.
- */
-typedef enum {
- /**
- * If a hardware exception is detected, a work-item signals an exception.
- */
- HSA_EXCEPTION_POLICY_BREAK = 1,
- /**
- * If a hardware exception is detected, a hardware status bit is set.
- */
- HSA_EXCEPTION_POLICY_DETECT = 2
-} hsa_exception_policy_t;
-
-/**
- * @deprecated Use ::hsa_isa_get_exception_policies for a given intruction set
- * architecture supported by the agent instead. If more than one ISA is
- * supported by the agent, this function uses the first value returned by
- * ::hsa_agent_iterate_isas.
- *
- * @brief Retrieve the exception policy support for a given combination of
- * agent and profile
- *
- * @param[in] agent Agent.
- *
- * @param[in] profile Profile.
- *
- * @param[out] mask Pointer to a memory location where the HSA runtime stores a
- * mask of ::hsa_exception_policy_t values. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p profile is not a valid
- * profile, or @p mask is NULL.
- *
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_agent_get_exception_policies(
- hsa_agent_t agent,
- hsa_profile_t profile,
- uint16_t *mask);
-
-/**
- * @brief Cache handle.
- */
-typedef struct hsa_cache_s {
- /**
- * Opaque handle. Two handles reference the same object of the enclosing type
- * if and only if they are equal.
- */
- uint64_t handle;
-} hsa_cache_t;
-
-/**
- * @brief Cache attributes.
- */
-typedef enum {
- /**
- * The length of the cache name in bytes, not including the NUL terminator.
- * The type of this attribute is uint32_t.
- */
- HSA_CACHE_INFO_NAME_LENGTH = 0,
- /**
- * Human-readable description. The type of this attribute is a NUL-terminated
- * character array with the length equal to the value of
- * ::HSA_CACHE_INFO_NAME_LENGTH attribute.
- */
- HSA_CACHE_INFO_NAME = 1,
- /**
- * Cache level. A L1 cache must return a value of 1, a L2 must return a value
- * of 2, and so on. The type of this attribute is uint8_t.
- */
- HSA_CACHE_INFO_LEVEL = 2,
- /**
- * Cache size, in bytes. A value of 0 indicates that there is no size
- * information available. The type of this attribute is uint32_t.
- */
- HSA_CACHE_INFO_SIZE = 3
-} hsa_cache_info_t;
-
-/**
- * @brief Get the current value of an attribute for a given cache object.
- *
- * @param[in] cache Cache.
- *
- * @param[in] attribute Attribute to query.
- *
- * @param[out] value Pointer to an application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_CACHE The cache is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
- * instruction set architecture attribute, or @p value is
- * NULL.
- */
-hsa_status_t HSA_API hsa_cache_get_info(
- hsa_cache_t cache,
- hsa_cache_info_t attribute,
- void* value);
-
-/**
- * @brief Iterate over the memory caches of a given agent, and
- * invoke an application-defined callback on every iteration.
- *
- * @details Caches are visited in ascending order according to the value of the
- * ::HSA_CACHE_INFO_LEVEL attribute.
- *
- * @param[in] agent A valid agent.
- *
- * @param[in] callback Callback to be invoked once per cache that is present in
- * the agent. The HSA runtime passes two arguments to the callback: the cache
- * and the application data. If @p callback returns a status other than
- * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
- * that value is returned.
- *
- * @param[in] data Application data that is passed to @p callback on every
- * iteration. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
- */
-hsa_status_t HSA_API hsa_agent_iterate_caches(
- hsa_agent_t agent,
- hsa_status_t (*callback)(hsa_cache_t cache, void* data),
- void* data);
-
-/**
- * @deprecated
- *
- * @brief Query if a given version of an extension is supported by an agent
- *
- * @param[in] extension Extension identifier.
- *
- * @param[in] agent Agent.
- *
- * @param[in] version_major Major version number.
- *
- * @param[in] version_minor Minor version number.
- *
- * @param[out] result Pointer to a memory location where the HSA runtime stores
- * the result of the check. The result is true if the specified version of the
- * extension is supported, and false otherwise. The result must be false if
- * ::hsa_system_extension_supported returns false for the same extension
- * version.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid
- * extension, or @p result is NULL.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_agent_extension_supported(
- uint16_t extension,
- hsa_agent_t agent,
- uint16_t version_major,
- uint16_t version_minor,
- bool* result);
-
-/**
- * @brief Query if a given version of an extension is supported by an agent. All
- * minor versions from 0 up to the returned @p version_minor must be supported.
- *
- * @param[in] extension Extension identifier.
- *
- * @param[in] agent Agent.
- *
- * @param[in] version_major Major version number.
- *
- * @param[out] version_minor Minor version number.
- *
- * @param[out] result Pointer to a memory location where the HSA runtime stores
- * the result of the check. The result is true if the specified version of the
- * extension is supported, and false otherwise. The result must be false if
- * ::hsa_system_extension_supported returns false for the same extension
- * version.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid
- * extension, or @p version_minor is NULL, or @p result is NULL.
- */
-hsa_status_t HSA_API hsa_agent_major_extension_supported(
- uint16_t extension,
- hsa_agent_t agent,
- uint16_t version_major,
- uint16_t *version_minor,
- bool* result);
-
-
-/** @} */
-
-
-/** \defgroup signals Signals
- * @{
- */
-
-/**
- * @brief Signal handle.
- */
-typedef struct hsa_signal_s {
- /**
- * Opaque handle. Two handles reference the same object of the enclosing type
- * if and only if they are equal. The value 0 is reserved.
- */
- uint64_t handle;
-} hsa_signal_t;
-
-/**
- * @brief Signal value. The value occupies 32 bits in small machine mode, and 64
- * bits in large machine mode.
- */
-#ifdef HSA_LARGE_MODEL
- typedef int64_t hsa_signal_value_t;
-#else
- typedef int32_t hsa_signal_value_t;
-#endif
-
-/**
- * @brief Create a signal.
- *
- * @param[in] initial_value Initial value of the signal.
- *
- * @param[in] num_consumers Size of @p consumers. A value of 0 indicates that
- * any agent might wait on the signal.
- *
- * @param[in] consumers List of agents that might consume (wait on) the
- * signal. If @p num_consumers is 0, this argument is ignored; otherwise, the
- * HSA runtime might use the list to optimize the handling of the signal
- * object. If an agent not listed in @p consumers waits on the returned
- * signal, the behavior is undefined. The memory associated with @p consumers
- * can be reused or freed after the function returns.
- *
- * @param[out] signal Pointer to a memory location where the HSA runtime will
- * store the newly created signal handle. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
- * the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p signal is NULL, @p
- * num_consumers is greater than 0 but @p consumers is NULL, or @p consumers
- * contains duplicates.
- */
-hsa_status_t HSA_API hsa_signal_create(
- hsa_signal_value_t initial_value,
- uint32_t num_consumers,
- const hsa_agent_t *consumers,
- hsa_signal_t *signal);
-
-/**
- * @brief Destroy a signal previous created by ::hsa_signal_create.
- *
- * @param[in] signal Signal.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_SIGNAL @p signal is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT The handle in @p signal is 0.
- */
-hsa_status_t HSA_API hsa_signal_destroy(
- hsa_signal_t signal);
-
-/**
- * @brief Atomically read the current value of a signal.
- *
- * @param[in] signal Signal.
- *
- * @return Value of the signal.
-*/
-hsa_signal_value_t HSA_API hsa_signal_load_scacquire(
- hsa_signal_t signal);
-
-/**
- * @copydoc hsa_signal_load_scacquire
- */
-hsa_signal_value_t HSA_API hsa_signal_load_relaxed(
- hsa_signal_t signal);
-
-/**
- * @deprecated Renamed as ::hsa_signal_load_scacquire.
- *
- * @copydoc hsa_signal_load_scacquire
-*/
-hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_load_acquire(
- hsa_signal_t signal);
-
-/**
- * @brief Atomically set the value of a signal.
- *
- * @details If the value of the signal is changed, all the agents waiting
- * on @p signal for which @p value satisfies their wait condition are awakened.
- *
- * @param[in] signal Signal.
- *
- * @param[in] value New signal value.
- */
-void HSA_API hsa_signal_store_relaxed(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_store_relaxed
- */
-void HSA_API hsa_signal_store_screlease(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @deprecated Renamed as ::hsa_signal_store_screlease.
- *
- * @copydoc hsa_signal_store_screlease
- */
-void HSA_API HSA_DEPRECATED hsa_signal_store_release(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @brief Atomically set the value of a signal without necessarily notifying the
- * the agents waiting on it.
- *
- * @details The agents waiting on @p signal may not wake up even when the new
- * value satisfies their wait condition. If the application wants to update the
- * signal and there is no need to notify any agent, invoking this function can
- * be more efficient than calling the non-silent counterpart.
- *
- * @param[in] signal Signal.
- *
- * @param[in] value New signal value.
- */
-void HSA_API hsa_signal_silent_store_relaxed(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_silent_store_relaxed
- */
-void HSA_API hsa_signal_silent_store_screlease(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @brief Atomically set the value of a signal and return its previous value.
- *
- * @details If the value of the signal is changed, all the agents waiting
- * on @p signal for which @p value satisfies their wait condition are awakened.
- *
- * @param[in] signal Signal. If @p signal is a queue doorbell signal, the
- * behavior is undefined.
- *
- * @param[in] value New value.
- *
- * @return Value of the signal prior to the exchange.
- *
- */
-hsa_signal_value_t HSA_API hsa_signal_exchange_scacq_screl(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @deprecated Renamed as ::hsa_signal_exchange_scacq_screl.
- *
- * @copydoc hsa_signal_exchange_scacq_screl
- */
-hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_exchange_acq_rel(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_exchange_scacq_screl
- */
-hsa_signal_value_t HSA_API hsa_signal_exchange_scacquire(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @deprecated Renamed as ::hsa_signal_exchange_scacquire.
- *
- * @copydoc hsa_signal_exchange_scacquire
- */
-hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_exchange_acquire(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_exchange_scacq_screl
- */
-hsa_signal_value_t HSA_API hsa_signal_exchange_relaxed(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-/**
- * @copydoc hsa_signal_exchange_scacq_screl
- */
-hsa_signal_value_t HSA_API hsa_signal_exchange_screlease(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @deprecated Renamed as ::hsa_signal_exchange_screlease.
- *
- * @copydoc hsa_signal_exchange_screlease
- */
-hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_exchange_release(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @brief Atomically set the value of a signal if the observed value is equal to
- * the expected value. The observed value is returned regardless of whether the
- * replacement was done.
- *
- * @details If the value of the signal is changed, all the agents waiting
- * on @p signal for which @p value satisfies their wait condition are awakened.
- *
- * @param[in] signal Signal. If @p signal is a queue
- * doorbell signal, the behavior is undefined.
- *
- * @param[in] expected Value to compare with.
- *
- * @param[in] value New value.
- *
- * @return Observed value of the signal.
- *
- */
-hsa_signal_value_t HSA_API hsa_signal_cas_scacq_screl(
- hsa_signal_t signal,
- hsa_signal_value_t expected,
- hsa_signal_value_t value);
-
-
-/**
- * @deprecated Renamed as ::hsa_signal_cas_scacq_screl.
- *
- * @copydoc hsa_signal_cas_scacq_screl
- */
-hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_cas_acq_rel(
- hsa_signal_t signal,
- hsa_signal_value_t expected,
- hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_cas_scacq_screl
- */
-hsa_signal_value_t HSA_API hsa_signal_cas_scacquire(
- hsa_signal_t signal,
- hsa_signal_value_t expected,
- hsa_signal_value_t value);
-
-/**
- * @deprecated Renamed as ::hsa_signal_cas_scacquire.
- *
- * @copydoc hsa_signal_cas_scacquire
- */
-hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_cas_acquire(
- hsa_signal_t signal,
- hsa_signal_value_t expected,
- hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_cas_scacq_screl
- */
-hsa_signal_value_t HSA_API hsa_signal_cas_relaxed(
- hsa_signal_t signal,
- hsa_signal_value_t expected,
- hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_cas_scacq_screl
- */
-hsa_signal_value_t HSA_API hsa_signal_cas_screlease(
- hsa_signal_t signal,
- hsa_signal_value_t expected,
- hsa_signal_value_t value);
-
-/**
- * @deprecated Renamed as ::hsa_signal_cas_screlease.
- *
- * @copydoc hsa_signal_cas_screlease
- */
-hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_cas_release(
- hsa_signal_t signal,
- hsa_signal_value_t expected,
- hsa_signal_value_t value);
-
-/**
- * @brief Atomically increment the value of a signal by a given amount.
- *
- * @details If the value of the signal is changed, all the agents waiting on
- * @p signal for which @p value satisfies their wait condition are awakened.
- *
- * @param[in] signal Signal. If @p signal is a queue doorbell signal, the
- * behavior is undefined.
- *
- * @param[in] value Value to add to the value of the signal.
- *
- */
-void HSA_API hsa_signal_add_scacq_screl(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @deprecated Renamed as ::hsa_signal_add_scacq_screl.
- *
- * @copydoc hsa_signal_add_scacq_screl
- */
-void HSA_API HSA_DEPRECATED hsa_signal_add_acq_rel(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_add_scacq_screl
- */
-void HSA_API hsa_signal_add_scacquire(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @deprecated Renamed as ::hsa_signal_add_scacquire.
- *
- * @copydoc hsa_signal_add_scacquire
- */
-void HSA_API HSA_DEPRECATED hsa_signal_add_acquire(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_add_scacq_screl
- */
-void HSA_API hsa_signal_add_relaxed(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_add_scacq_screl
- */
-void HSA_API hsa_signal_add_screlease(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-
-/**
- * @deprecated Renamed as ::hsa_signal_add_screlease.
- *
- * @copydoc hsa_signal_add_screlease
- */
-void HSA_API HSA_DEPRECATED hsa_signal_add_release(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @brief Atomically decrement the value of a signal by a given amount.
- *
- * @details If the value of the signal is changed, all the agents waiting on
- * @p signal for which @p value satisfies their wait condition are awakened.
- *
- * @param[in] signal Signal. If @p signal is a queue doorbell signal, the
- * behavior is undefined.
- *
- * @param[in] value Value to subtract from the value of the signal.
- *
- */
-void HSA_API hsa_signal_subtract_scacq_screl(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-
-/**
- * @deprecated Renamed as ::hsa_signal_subtract_scacq_screl.
- *
- * @copydoc hsa_signal_subtract_scacq_screl
- */
-void HSA_API HSA_DEPRECATED hsa_signal_subtract_acq_rel(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_subtract_scacq_screl
- */
-void HSA_API hsa_signal_subtract_scacquire(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @deprecated Renamed as ::hsa_signal_subtract_scacquire.
- *
- * @copydoc hsa_signal_subtract_scacquire
- */
-void HSA_API HSA_DEPRECATED hsa_signal_subtract_acquire(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_subtract_scacq_screl
- */
-void HSA_API hsa_signal_subtract_relaxed(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_subtract_scacq_screl
- */
-void HSA_API hsa_signal_subtract_screlease(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-
-/**
- * @deprecated Renamed as ::hsa_signal_subtract_screlease.
- *
- * @copydoc hsa_signal_subtract_screlease
- */
-void HSA_API HSA_DEPRECATED hsa_signal_subtract_release(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @brief Atomically perform a bitwise AND operation between the value of a
- * signal and a given value.
- *
- * @details If the value of the signal is changed, all the agents waiting on
- * @p signal for which @p value satisfies their wait condition are awakened.
- *
- * @param[in] signal Signal. If @p signal is a queue doorbell signal, the
- * behavior is undefined.
- *
- * @param[in] value Value to AND with the value of the signal.
- *
- */
-void HSA_API hsa_signal_and_scacq_screl(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @deprecated Renamed as ::hsa_signal_and_scacq_screl.
- *
- * @copydoc hsa_signal_and_scacq_screl
- */
-void HSA_API HSA_DEPRECATED hsa_signal_and_acq_rel(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_and_scacq_screl
- */
-void HSA_API hsa_signal_and_scacquire(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @deprecated Renamed as ::hsa_signal_and_scacquire.
- *
- * @copydoc hsa_signal_and_scacquire
- */
-void HSA_API HSA_DEPRECATED hsa_signal_and_acquire(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_and_scacq_screl
- */
-void HSA_API hsa_signal_and_relaxed(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_and_scacq_screl
- */
-void HSA_API hsa_signal_and_screlease(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-
-/**
- * @deprecated Renamed as ::hsa_signal_and_screlease.
- *
- * @copydoc hsa_signal_and_screlease
- */
-void HSA_API HSA_DEPRECATED hsa_signal_and_release(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @brief Atomically perform a bitwise OR operation between the value of a
- * signal and a given value.
- *
- * @details If the value of the signal is changed, all the agents waiting on
- * @p signal for which @p value satisfies their wait condition are awakened.
- *
- * @param[in] signal Signal. If @p signal is a queue doorbell signal, the
- * behavior is undefined.
- *
- * @param[in] value Value to OR with the value of the signal.
- */
-void HSA_API hsa_signal_or_scacq_screl(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-
-/**
- * @deprecated Renamed as ::hsa_signal_or_scacq_screl.
- *
- * @copydoc hsa_signal_or_scacq_screl
- */
-void HSA_API HSA_DEPRECATED hsa_signal_or_acq_rel(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_or_scacq_screl
- */
-void HSA_API hsa_signal_or_scacquire(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @deprecated Renamed as ::hsa_signal_or_scacquire.
- *
- * @copydoc hsa_signal_or_scacquire
- */
-void HSA_API HSA_DEPRECATED hsa_signal_or_acquire(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_or_scacq_screl
- */
-void HSA_API hsa_signal_or_relaxed(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_or_scacq_screl
- */
-void HSA_API hsa_signal_or_screlease(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @deprecated Renamed as ::hsa_signal_or_screlease.
- *
- * @copydoc hsa_signal_or_screlease
- */
-void HSA_API HSA_DEPRECATED hsa_signal_or_release(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @brief Atomically perform a bitwise XOR operation between the value of a
- * signal and a given value.
- *
- * @details If the value of the signal is changed, all the agents waiting on
- * @p signal for which @p value satisfies their wait condition are awakened.
- *
- * @param[in] signal Signal. If @p signal is a queue doorbell signal, the
- * behavior is undefined.
- *
- * @param[in] value Value to XOR with the value of the signal.
- *
- */
-void HSA_API hsa_signal_xor_scacq_screl(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-
-/**
- * @deprecated Renamed as ::hsa_signal_xor_scacq_screl.
- *
- * @copydoc hsa_signal_xor_scacq_screl
- */
-void HSA_API HSA_DEPRECATED hsa_signal_xor_acq_rel(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_xor_scacq_screl
- */
-void HSA_API hsa_signal_xor_scacquire(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @deprecated Renamed as ::hsa_signal_xor_scacquire.
- *
- * @copydoc hsa_signal_xor_scacquire
- */
-void HSA_API HSA_DEPRECATED hsa_signal_xor_acquire(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_xor_scacq_screl
- */
-void HSA_API hsa_signal_xor_relaxed(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_xor_scacq_screl
- */
-void HSA_API hsa_signal_xor_screlease(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @deprecated Renamed as ::hsa_signal_xor_screlease.
- *
- * @copydoc hsa_signal_xor_screlease
- */
-void HSA_API HSA_DEPRECATED hsa_signal_xor_release(
- hsa_signal_t signal,
- hsa_signal_value_t value);
-
-/**
- * @brief Wait condition operator.
- */
-typedef enum {
- /**
- * The two operands are equal.
- */
- HSA_SIGNAL_CONDITION_EQ = 0,
- /**
- * The two operands are not equal.
- */
- HSA_SIGNAL_CONDITION_NE = 1,
- /**
- * The first operand is less than the second operand.
- */
- HSA_SIGNAL_CONDITION_LT = 2,
- /**
- * The first operand is greater than or equal to the second operand.
- */
- HSA_SIGNAL_CONDITION_GTE = 3
-} hsa_signal_condition_t;
-
-/**
- * @brief State of the application thread during a signal wait.
- */
-typedef enum {
- /**
- * The application thread may be rescheduled while waiting on the signal.
- */
- HSA_WAIT_STATE_BLOCKED = 0,
- /**
- * The application thread stays active while waiting on a signal.
- */
- HSA_WAIT_STATE_ACTIVE = 1
-} hsa_wait_state_t;
-
-
-/**
- * @brief Wait until a signal value satisfies a specified condition, or a
- * certain amount of time has elapsed.
- *
- * @details A wait operation can spuriously resume at any time sooner than the
- * timeout (for example, due to system or other external factors) even when the
- * condition has not been met.
- *
- * The function is guaranteed to return if the signal value satisfies the
- * condition at some point in time during the wait, but the value returned to
- * the application might not satisfy the condition. The application must ensure
- * that signals are used in such way that wait wakeup conditions are not
- * invalidated before dependent threads have woken up.
- *
- * When the wait operation internally loads the value of the passed signal, it
- * uses the memory order indicated in the function name.
- *
- * @param[in] signal Signal.
- *
- * @param[in] condition Condition used to compare the signal value with @p
- * compare_value.
- *
- * @param[in] compare_value Value to compare with.
- *
- * @param[in] timeout_hint Maximum duration of the wait. Specified in the same
- * unit as the system timestamp. The operation might block for a shorter or
- * longer time even if the condition is not met. A value of UINT64_MAX indicates
- * no maximum.
- *
- * @param[in] wait_state_hint Hint used by the application to indicate the
- * preferred waiting state. The actual waiting state is ultimately decided by
- * HSA runtime and may not match the provided hint. A value of
- * ::HSA_WAIT_STATE_ACTIVE may improve the latency of response to a signal
- * update by avoiding rescheduling overhead.
- *
- * @return Observed value of the signal, which might not satisfy the specified
- * condition.
- *
-*/
-hsa_signal_value_t HSA_API hsa_signal_wait_scacquire(
- hsa_signal_t signal,
- hsa_signal_condition_t condition,
- hsa_signal_value_t compare_value,
- uint64_t timeout_hint,
- hsa_wait_state_t wait_state_hint);
-
-/**
- * @copydoc hsa_signal_wait_scacquire
- */
-hsa_signal_value_t HSA_API hsa_signal_wait_relaxed(
- hsa_signal_t signal,
- hsa_signal_condition_t condition,
- hsa_signal_value_t compare_value,
- uint64_t timeout_hint,
- hsa_wait_state_t wait_state_hint);
-
-/**
- * @deprecated Renamed as ::hsa_signal_wait_scacquire.
- *
- * @copydoc hsa_signal_wait_scacquire
- */
-hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_wait_acquire(
- hsa_signal_t signal,
- hsa_signal_condition_t condition,
- hsa_signal_value_t compare_value,
- uint64_t timeout_hint,
- hsa_wait_state_t wait_state_hint);
-
-/**
- * @brief Group of signals.
- */
-typedef struct hsa_signal_group_s {
- /**
- * Opaque handle. Two handles reference the same object of the enclosing type
- * if and only if they are equal.
- */
- uint64_t handle;
-} hsa_signal_group_t;
-
-/**
- * @brief Create a signal group.
- *
- * @param[in] num_signals Number of elements in @p signals. Must not be 0.
- *
- * @param[in] signals List of signals in the group. The list must not contain
- * any repeated elements. Must not be NULL.
- *
- * @param[in] num_consumers Number of elements in @p consumers. Must not be 0.
- *
- * @param[in] consumers List of agents that might consume (wait on) the signal
- * group. The list must not contain repeated elements, and must be a subset of
- * the set of agents that are allowed to wait on all the signals in the
- * group. If an agent not listed in @p consumers waits on the returned group,
- * the behavior is undefined. The memory associated with @p consumers can be
- * reused or freed after the function returns. Must not be NULL.
- *
- * @param[out] signal_group Pointer to newly created signal group. Must not be
- * NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
- * the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p num_signals is 0, @p signals
- * is NULL, @p num_consumers is 0, @p consumers is NULL, or @p signal_group is
- * NULL.
- */
-hsa_status_t HSA_API hsa_signal_group_create(
- uint32_t num_signals,
- const hsa_signal_t *signals,
- uint32_t num_consumers,
- const hsa_agent_t *consumers,
- hsa_signal_group_t *signal_group);
-
-/**
- * @brief Destroy a signal group previous created by ::hsa_signal_group_create.
- *
- * @param[in] signal_group Signal group.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_SIGNAL_GROUP @p signal_group is invalid.
- */
-hsa_status_t HSA_API hsa_signal_group_destroy(
- hsa_signal_group_t signal_group);
-
-/**
- * @brief Wait until the value of at least one of the signals in a signal group
- * satisfies its associated condition.
- *
- * @details The function is guaranteed to return if the value of at least one of
- * the signals in the group satisfies its associated condition at some point in
- * time during the wait, but the signal value returned to the application may no
- * longer satisfy the condition. The application must ensure that signals in the
- * group are used in such way that wait wakeup conditions are not invalidated
- * before dependent threads have woken up.
- *
- * When this operation internally loads the value of the passed signal, it uses
- * the memory order indicated in the function name.
- *
- * @param[in] signal_group Signal group.
- *
- * @param[in] conditions List of conditions. Each condition, and the value at
- * the same index in @p compare_values, is used to compare the value of the
- * signal at that index in @p signal_group (the signal passed by the application
- * to ::hsa_signal_group_create at that particular index). The size of @p
- * conditions must not be smaller than the number of signals in @p signal_group;
- * any extra elements are ignored. Must not be NULL.
- *
- * @param[in] compare_values List of comparison values. The size of @p
- * compare_values must not be smaller than the number of signals in @p
- * signal_group; any extra elements are ignored. Must not be NULL.
- *
- * @param[in] wait_state_hint Hint used by the application to indicate the
- * preferred waiting state. The actual waiting state is decided by the HSA runtime
- * and may not match the provided hint. A value of ::HSA_WAIT_STATE_ACTIVE may
- * improve the latency of response to a signal update by avoiding rescheduling
- * overhead.
- *
- * @param[out] signal Signal in the group that satisfied the associated
- * condition. If several signals satisfied their condition, the function can
- * return any of those signals. Must not be NULL.
- *
- * @param[out] value Observed value for @p signal, which might no longer satisfy
- * the specified condition. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_SIGNAL_GROUP @p signal_group is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p conditions is NULL, @p
- * compare_values is NULL, @p signal is NULL, or @p value is NULL.
- */
-hsa_status_t HSA_API hsa_signal_group_wait_any_scacquire(
- hsa_signal_group_t signal_group,
- const hsa_signal_condition_t *conditions,
- const hsa_signal_value_t *compare_values,
- hsa_wait_state_t wait_state_hint,
- hsa_signal_t *signal,
- hsa_signal_value_t *value);
-
-/**
- * @copydoc hsa_signal_group_wait_any_scacquire
- */
-hsa_status_t HSA_API hsa_signal_group_wait_any_relaxed(
- hsa_signal_group_t signal_group,
- const hsa_signal_condition_t *conditions,
- const hsa_signal_value_t *compare_values,
- hsa_wait_state_t wait_state_hint,
- hsa_signal_t *signal,
- hsa_signal_value_t *value);
-
-/** @} */
-
-/** \defgroup memory Memory
- * @{
- */
-
-/**
- * @brief A memory region represents a block of virtual memory with certain
- * properties. For example, the HSA runtime represents fine-grained memory in
- * the global segment using a region. A region might be associated with more
- * than one agent.
- */
-typedef struct hsa_region_s {
- /**
- * Opaque handle. Two handles reference the same object of the enclosing type
- * if and only if they are equal.
- */
- uint64_t handle;
-} hsa_region_t;
-
-/** @} */
-
-
-/** \defgroup queue Queues
- * @{
- */
-
-/**
- * @brief Queue type. Intended to be used for dynamic queue protocol
- * determination.
- */
-typedef enum {
- /**
- * Queue supports multiple producers. Use of multiproducer queue mechanics is
- * required.
- */
- HSA_QUEUE_TYPE_MULTI = 0,
- /**
- * Queue only supports a single producer. In some scenarios, the application
- * may want to limit the submission of AQL packets to a single agent. Queues
- * that support a single producer may be more efficient than queues supporting
- * multiple producers. Use of multiproducer queue mechanics is not supported.
- */
- HSA_QUEUE_TYPE_SINGLE = 1,
- /**
- * Queue supports multiple producers and cooperative dispatches. Cooperative
- * dispatches are able to use GWS synchronization. Queues of this type may be
- * limited in number. The runtime may return the same queue to serve multiple
- * ::hsa_queue_create calls when this type is given. Callers must inspect the
- * returned queue to discover queue size. Queues of this type are reference
- * counted and require a matching number of ::hsa_queue_destroy calls to
- * release. Use of multiproducer queue mechanics is required. See
- * ::HSA_AMD_AGENT_INFO_COOPERATIVE_QUEUES to query agent support for this
- * type.
- */
- HSA_QUEUE_TYPE_COOPERATIVE = 2
-} hsa_queue_type_t;
-
-/**
- * @brief A fixed-size type used to represent ::hsa_queue_type_t constants.
- */
-typedef uint32_t hsa_queue_type32_t;
-
-/**
- * @brief Queue features.
- */
-typedef enum {
- /**
- * Queue supports kernel dispatch packets.
- */
- HSA_QUEUE_FEATURE_KERNEL_DISPATCH = 1,
-
- /**
- * Queue supports agent dispatch packets.
- */
- HSA_QUEUE_FEATURE_AGENT_DISPATCH = 2
-} hsa_queue_feature_t;
-
-/**
- * @brief User mode queue.
- *
- * @details The queue structure is read-only and allocated by the HSA runtime,
- * but agents can directly modify the contents of the buffer pointed by @a
- * base_address, or use HSA runtime APIs to access the doorbell signal.
- *
- */
-typedef struct hsa_queue_s {
- /**
- * Queue type.
- */
- hsa_queue_type32_t type;
-
- /**
- * Queue features mask. This is a bit-field of ::hsa_queue_feature_t
- * values. Applications should ignore any unknown set bits.
- */
- uint32_t features;
-
-#ifdef HSA_LARGE_MODEL
- void* base_address;
-#elif defined HSA_LITTLE_ENDIAN
- /**
- * Starting address of the HSA runtime-allocated buffer used to store the AQL
- * packets. Must be aligned to the size of an AQL packet.
- */
- void* base_address;
- /**
- * Reserved. Must be 0.
- */
- uint32_t reserved0;
-#else
- uint32_t reserved0;
- void* base_address;
-#endif
-
- /**
- * Signal object used by the application to indicate the ID of a packet that
- * is ready to be processed. The HSA runtime manages the doorbell signal. If
- * the application tries to replace or destroy this signal, the behavior is
- * undefined.
- *
- * If @a type is ::HSA_QUEUE_TYPE_SINGLE, the doorbell signal value must be
- * updated in a monotonically increasing fashion. If @a type is
- * ::HSA_QUEUE_TYPE_MULTI, the doorbell signal value can be updated with any
- * value.
- */
- hsa_signal_t doorbell_signal;
-
- /**
- * Maximum number of packets the queue can hold. Must be a power of 2.
- */
- uint32_t size;
- /**
- * Reserved. Must be 0.
- */
- uint32_t reserved1;
- /**
- * Queue identifier, which is unique over the lifetime of the application.
- */
- uint64_t id;
-
-} hsa_queue_t;
-
-/**
- * @brief Create a user mode queue.
- *
- * @details The HSA runtime creates the queue structure, the underlying packet
- * buffer, the completion signal, and the write and read indexes. The initial
- * value of the write and read indexes is 0. The type of every packet in the
- * buffer is initialized to ::HSA_PACKET_TYPE_INVALID.
- *
- * The application should only rely on the error code returned to determine if
- * the queue is valid.
- *
- * @param[in] agent Agent where to create the queue.
- *
- * @param[in] size Number of packets the queue is expected to
- * hold. Must be a power of 2 between 1 and the value of
- * ::HSA_AGENT_INFO_QUEUE_MAX_SIZE in @p agent. The size of the newly
- * created queue is the maximum of @p size and the value of
- * ::HSA_AGENT_INFO_QUEUE_MIN_SIZE in @p agent.
- *
- * @param[in] type Type of the queue, a bitwise OR of hsa_queue_type_t values.
- * If the value of ::HSA_AGENT_INFO_QUEUE_TYPE in @p agent is ::HSA_QUEUE_TYPE_SINGLE,
- * then @p type must also be ::HSA_QUEUE_TYPE_SINGLE.
- *
- * @param[in] callback Callback invoked by the HSA runtime for every
- * asynchronous event related to the newly created queue. May be NULL. The HSA
- * runtime passes three arguments to the callback: a code identifying the event
- * that triggered the invocation, a pointer to the queue where the event
- * originated, and the application data.
- *
- * @param[in] data Application data that is passed to @p callback on every
- * iteration. May be NULL.
- *
- * @param[in] private_segment_size Hint indicating the maximum
- * expected private segment usage per work-item, in bytes. There may
- * be performance degradation if the application places a kernel
- * dispatch packet in the queue and the corresponding private segment
- * usage exceeds @p private_segment_size. If the application does not
- * want to specify any particular value for this argument, @p
- * private_segment_size must be UINT32_MAX. If the queue does not
- * support kernel dispatch packets, this argument is ignored.
- *
- * @param[in] group_segment_size Hint indicating the maximum expected
- * group segment usage per work-group, in bytes. There may be
- * performance degradation if the application places a kernel dispatch
- * packet in the queue and the corresponding group segment usage
- * exceeds @p group_segment_size. If the application does not want to
- * specify any particular value for this argument, @p
- * group_segment_size must be UINT32_MAX. If the queue does not
- * support kernel dispatch packets, this argument is ignored.
- *
- * @param[out] queue Memory location where the HSA runtime stores a pointer to
- * the newly created queue.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
- * the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE_CREATION @p agent does not
- * support queues of the given type.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p size is not a power of two,
- * @p size is 0, @p type is an invalid queue type, or @p queue is NULL.
- *
- */
-hsa_status_t HSA_API hsa_queue_create(
- hsa_agent_t agent,
- uint32_t size,
- hsa_queue_type32_t type,
- void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data),
- void *data,
- uint32_t private_segment_size,
- uint32_t group_segment_size,
- hsa_queue_t **queue);
-
-/**
- * @brief Create a queue for which the application or a kernel is responsible
- * for processing the AQL packets.
- *
- * @details The application can use this function to create queues where AQL
- * packets are not parsed by the packet processor associated with an agent,
- * but rather by a unit of execution running on that agent (for example, a
- * thread in the host application).
- *
- * The application is responsible for ensuring that all the producers and
- * consumers of the resulting queue can access the provided doorbell signal
- * and memory region. The application is also responsible for ensuring that the
- * unit of execution processing the queue packets supports the indicated
- * features (AQL packet types).
- *
- * When the queue is created, the HSA runtime allocates the packet buffer using
- * @p region, and the write and read indexes. The initial value of the write and
- * read indexes is 0, and the type of every packet in the buffer is initialized
- * to ::HSA_PACKET_TYPE_INVALID. The value of the @e size, @e type, @e features,
- * and @e doorbell_signal fields in the returned queue match the values passed
- * by the application.
- *
- * @param[in] region Memory region that the HSA runtime should use to allocate
- * the AQL packet buffer and any other queue metadata.
- *
- * @param[in] size Number of packets the queue is expected to hold. Must be a
- * power of 2 greater than 0.
- *
- * @param[in] type Queue type.
- *
- * @param[in] features Supported queue features. This is a bit-field of
- * ::hsa_queue_feature_t values.
- *
- * @param[in] doorbell_signal Doorbell signal that the HSA runtime must
- * associate with the returned queue. The signal handle must not be 0.
- *
- * @param[out] queue Memory location where the HSA runtime stores a pointer to
- * the newly created queue. The application should not rely on the value
- * returned for this argument but only in the status code to determine if the
- * queue is valid. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
- * the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p size is not a power of two, @p
- * size is 0, @p type is an invalid queue type, the doorbell signal handle is
- * 0, or @p queue is NULL.
- *
- */
-hsa_status_t HSA_API hsa_soft_queue_create(
- hsa_region_t region,
- uint32_t size,
- hsa_queue_type32_t type,
- uint32_t features,
- hsa_signal_t doorbell_signal,
- hsa_queue_t **queue);
-
-/**
- * @brief Destroy a user mode queue.
- *
- * @details When a queue is destroyed, the state of the AQL packets that have
- * not been yet fully processed (their completion phase has not finished)
- * becomes undefined. It is the responsibility of the application to ensure that
- * all pending queue operations are finished if their results are required.
- *
- * The resources allocated by the HSA runtime during queue creation (queue
- * structure, ring buffer, doorbell signal) are released. The queue should not
- * be accessed after being destroyed.
- *
- * @param[in] queue Pointer to a queue created using ::hsa_queue_create.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE The queue is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p queue is NULL.
- */
-hsa_status_t HSA_API hsa_queue_destroy(
- hsa_queue_t *queue);
-
-/**
- * @brief Inactivate a queue.
- *
- * @details Inactivating the queue aborts any pending executions and prevent any
- * new packets from being processed. Any more packets written to the queue once
- * it is inactivated will be ignored by the packet processor.
- *
- * @param[in] queue Pointer to a queue.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE The queue is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p queue is NULL.
- */
-hsa_status_t HSA_API hsa_queue_inactivate(
- hsa_queue_t *queue);
-
-/**
- * @deprecated Renamed as ::hsa_queue_load_read_index_scacquire.
- *
- * @copydoc hsa_queue_load_read_index_scacquire
- */
-uint64_t HSA_API HSA_DEPRECATED hsa_queue_load_read_index_acquire(
- const hsa_queue_t *queue);
-
-/**
- * @brief Atomically load the read index of a queue.
- *
- * @param[in] queue Pointer to a queue.
- *
- * @return Read index of the queue pointed by @p queue.
- */
-uint64_t HSA_API hsa_queue_load_read_index_scacquire(
- const hsa_queue_t *queue);
-
-/**
- * @copydoc hsa_queue_load_read_index_scacquire
- */
-uint64_t HSA_API hsa_queue_load_read_index_relaxed(
- const hsa_queue_t *queue);
-
-/**
- * @deprecated Renamed as ::hsa_queue_load_write_index_scacquire.
- *
- * @copydoc hsa_queue_load_write_index_scacquire
- */
-uint64_t HSA_API HSA_DEPRECATED hsa_queue_load_write_index_acquire(
- const hsa_queue_t *queue);
-
-/**
- * @brief Atomically load the write index of a queue.
- *
- * @param[in] queue Pointer to a queue.
- *
- * @return Write index of the queue pointed by @p queue.
- */
-uint64_t HSA_API hsa_queue_load_write_index_scacquire(
- const hsa_queue_t *queue);
-
-/**
- * @copydoc hsa_queue_load_write_index_scacquire
- */
-uint64_t HSA_API hsa_queue_load_write_index_relaxed(
- const hsa_queue_t *queue);
-
-/**
- * @brief Atomically set the write index of a queue.
- *
- * @details It is recommended that the application uses this function to update
- * the write index when there is a single agent submitting work to the queue
- * (the queue type is ::HSA_QUEUE_TYPE_SINGLE).
- *
- * @param[in] queue Pointer to a queue.
- *
- * @param[in] value Value to assign to the write index.
- *
- */
-void HSA_API hsa_queue_store_write_index_relaxed(
- const hsa_queue_t *queue,
- uint64_t value);
-
-/**
- * @deprecated Renamed as ::hsa_queue_store_write_index_screlease.
- *
- * @copydoc hsa_queue_store_write_index_screlease
- */
-void HSA_API HSA_DEPRECATED hsa_queue_store_write_index_release(
- const hsa_queue_t *queue,
- uint64_t value);
-
-/**
- * @copydoc hsa_queue_store_write_index_relaxed
- */
-void HSA_API hsa_queue_store_write_index_screlease(
- const hsa_queue_t *queue,
- uint64_t value);
-
-/**
- * @deprecated Renamed as ::hsa_queue_cas_write_index_scacq_screl.
- *
- * @copydoc hsa_queue_cas_write_index_scacq_screl
- */
-uint64_t HSA_API HSA_DEPRECATED hsa_queue_cas_write_index_acq_rel(
- const hsa_queue_t *queue,
- uint64_t expected,
- uint64_t value);
-
-/**
- * @brief Atomically set the write index of a queue if the observed value is
- * equal to the expected value. The application can inspect the returned value
- * to determine if the replacement was done.
- *
- * @param[in] queue Pointer to a queue.
- *
- * @param[in] expected Expected value.
- *
- * @param[in] value Value to assign to the write index if @p expected matches
- * the observed write index. Must be greater than @p expected.
- *
- * @return Previous value of the write index.
- */
-uint64_t HSA_API hsa_queue_cas_write_index_scacq_screl(
- const hsa_queue_t *queue,
- uint64_t expected,
- uint64_t value);
-
-/**
- * @deprecated Renamed as ::hsa_queue_cas_write_index_scacquire.
- *
- * @copydoc hsa_queue_cas_write_index_scacquire
- */
-uint64_t HSA_API HSA_DEPRECATED hsa_queue_cas_write_index_acquire(
- const hsa_queue_t *queue,
- uint64_t expected,
- uint64_t value);
-
-/**
- * @copydoc hsa_queue_cas_write_index_scacq_screl
- */
-uint64_t HSA_API hsa_queue_cas_write_index_scacquire(
- const hsa_queue_t *queue,
- uint64_t expected,
- uint64_t value);
-
-/**
- * @copydoc hsa_queue_cas_write_index_scacq_screl
- */
-uint64_t HSA_API hsa_queue_cas_write_index_relaxed(
- const hsa_queue_t *queue,
- uint64_t expected,
- uint64_t value);
-
-/**
- * @deprecated Renamed as ::hsa_queue_cas_write_index_screlease.
- *
- * @copydoc hsa_queue_cas_write_index_screlease
- */
-uint64_t HSA_API HSA_DEPRECATED hsa_queue_cas_write_index_release(
- const hsa_queue_t *queue,
- uint64_t expected,
- uint64_t value);
-
-/**
- * @copydoc hsa_queue_cas_write_index_scacq_screl
- */
-uint64_t HSA_API hsa_queue_cas_write_index_screlease(
- const hsa_queue_t *queue,
- uint64_t expected,
- uint64_t value);
-
-/**
- * @deprecated Renamed as ::hsa_queue_add_write_index_scacq_screl.
- *
- * @copydoc hsa_queue_add_write_index_scacq_screl
- */
-uint64_t HSA_API HSA_DEPRECATED hsa_queue_add_write_index_acq_rel(
- const hsa_queue_t *queue,
- uint64_t value);
-
-/**
- * @brief Atomically increment the write index of a queue by an offset.
- *
- * @param[in] queue Pointer to a queue.
- *
- * @param[in] value Value to add to the write index.
- *
- * @return Previous value of the write index.
- */
-uint64_t HSA_API hsa_queue_add_write_index_scacq_screl(
- const hsa_queue_t *queue,
- uint64_t value);
-
-/**
- * @deprecated Renamed as ::hsa_queue_add_write_index_scacquire.
- *
- * @copydoc hsa_queue_add_write_index_scacquire
- */
-uint64_t HSA_API HSA_DEPRECATED hsa_queue_add_write_index_acquire(
- const hsa_queue_t *queue,
- uint64_t value);
-
-/**
- * @copydoc hsa_queue_add_write_index_scacq_screl
- */
-uint64_t HSA_API hsa_queue_add_write_index_scacquire(
- const hsa_queue_t *queue,
- uint64_t value);
-
-/**
- * @copydoc hsa_queue_add_write_index_scacq_screl
- */
-uint64_t HSA_API hsa_queue_add_write_index_relaxed(
- const hsa_queue_t *queue,
- uint64_t value);
-
-/**
- * @deprecated Renamed as ::hsa_queue_add_write_index_screlease.
- *
- * @copydoc hsa_queue_add_write_index_screlease
- */
-uint64_t HSA_API HSA_DEPRECATED hsa_queue_add_write_index_release(
- const hsa_queue_t *queue,
- uint64_t value);
-
-/**
- * @copydoc hsa_queue_add_write_index_scacq_screl
- */
-uint64_t HSA_API hsa_queue_add_write_index_screlease(
- const hsa_queue_t *queue,
- uint64_t value);
-
-/**
- * @brief Atomically set the read index of a queue.
- *
- * @details Modifications of the read index are not allowed and result in
- * undefined behavior if the queue is associated with an agent for which
- * only the corresponding packet processor is permitted to update the read
- * index.
- *
- * @param[in] queue Pointer to a queue.
- *
- * @param[in] value Value to assign to the read index.
- *
- */
-void HSA_API hsa_queue_store_read_index_relaxed(
- const hsa_queue_t *queue,
- uint64_t value);
-
-/**
- * @deprecated Renamed as ::hsa_queue_store_read_index_screlease.
- *
- * @copydoc hsa_queue_store_read_index_screlease
- */
-void HSA_API HSA_DEPRECATED hsa_queue_store_read_index_release(
- const hsa_queue_t *queue,
- uint64_t value);
-
-/**
- * @copydoc hsa_queue_store_read_index_relaxed
- */
-void HSA_API hsa_queue_store_read_index_screlease(
- const hsa_queue_t *queue,
- uint64_t value);
-/** @} */
-
-
-/** \defgroup aql Architected Queuing Language
- * @{
- */
-
-/**
- * @brief Packet type.
- */
-typedef enum {
- /**
- * Vendor-specific packet.
- */
- HSA_PACKET_TYPE_VENDOR_SPECIFIC = 0,
- /**
- * The packet has been processed in the past, but has not been reassigned to
- * the packet processor. A packet processor must not process a packet of this
- * type. All queues support this packet type.
- */
- HSA_PACKET_TYPE_INVALID = 1,
- /**
- * Packet used by agents for dispatching jobs to kernel agents. Not all
- * queues support packets of this type (see ::hsa_queue_feature_t).
- */
- HSA_PACKET_TYPE_KERNEL_DISPATCH = 2,
- /**
- * Packet used by agents to delay processing of subsequent packets, and to
- * express complex dependencies between multiple packets. All queues support
- * this packet type.
- */
- HSA_PACKET_TYPE_BARRIER_AND = 3,
- /**
- * Packet used by agents for dispatching jobs to agents. Not all
- * queues support packets of this type (see ::hsa_queue_feature_t).
- */
- HSA_PACKET_TYPE_AGENT_DISPATCH = 4,
- /**
- * Packet used by agents to delay processing of subsequent packets, and to
- * express complex dependencies between multiple packets. All queues support
- * this packet type.
- */
- HSA_PACKET_TYPE_BARRIER_OR = 5
-} hsa_packet_type_t;
-
-/**
- * @brief Scope of the memory fence operation associated with a packet.
- */
-typedef enum {
- /**
- * No scope (no fence is applied). The packet relies on external fences to
- * ensure visibility of memory updates.
- */
- HSA_FENCE_SCOPE_NONE = 0,
- /**
- * The fence is applied with agent scope for the global segment.
- */
- HSA_FENCE_SCOPE_AGENT = 1,
- /**
- * The fence is applied across both agent and system scope for the global
- * segment.
- */
- HSA_FENCE_SCOPE_SYSTEM = 2
-} hsa_fence_scope_t;
-
-/**
- * @brief Sub-fields of the @a header field that is present in any AQL
- * packet. The offset (with respect to the address of @a header) of a sub-field
- * is identical to its enumeration constant. The width of each sub-field is
- * determined by the corresponding value in ::hsa_packet_header_width_t. The
- * offset and the width are expressed in bits.
- */
- typedef enum {
- /**
- * Packet type. The value of this sub-field must be one of
- * ::hsa_packet_type_t. If the type is ::HSA_PACKET_TYPE_VENDOR_SPECIFIC, the
- * packet layout is vendor-specific.
- */
- HSA_PACKET_HEADER_TYPE = 0,
- /**
- * Barrier bit. If the barrier bit is set, the processing of the current
- * packet only launches when all preceding packets (within the same queue) are
- * complete.
- */
- HSA_PACKET_HEADER_BARRIER = 8,
- /**
- * Acquire fence scope. The value of this sub-field determines the scope and
- * type of the memory fence operation applied before the packet enters the
- * active phase. An acquire fence ensures that any subsequent global segment
- * or image loads by any unit of execution that belongs to a dispatch that has
- * not yet entered the active phase on any queue of the same kernel agent,
- * sees any data previously released at the scopes specified by the acquire
- * fence. The value of this sub-field must be one of ::hsa_fence_scope_t.
- */
- HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE = 9,
- /**
- * @deprecated Renamed as ::HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE.
- */
- HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE = 9,
- /**
- * Release fence scope, The value of this sub-field determines the scope and
- * type of the memory fence operation applied after kernel completion but
- * before the packet is completed. A release fence makes any global segment or
- * image data that was stored by any unit of execution that belonged to a
- * dispatch that has completed the active phase on any queue of the same
- * kernel agent visible in all the scopes specified by the release fence. The
- * value of this sub-field must be one of ::hsa_fence_scope_t.
- */
- HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE = 11,
- /**
- * @deprecated Renamed as ::HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE.
- */
- HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE = 11
- } hsa_packet_header_t;
-
-/**
- * @brief Width (in bits) of the sub-fields in ::hsa_packet_header_t.
- */
- typedef enum {
- HSA_PACKET_HEADER_WIDTH_TYPE = 8,
- HSA_PACKET_HEADER_WIDTH_BARRIER = 1,
- HSA_PACKET_HEADER_WIDTH_SCACQUIRE_FENCE_SCOPE = 2,
- /**
- * @deprecated Use HSA_PACKET_HEADER_WIDTH_SCACQUIRE_FENCE_SCOPE.
- */
- HSA_PACKET_HEADER_WIDTH_ACQUIRE_FENCE_SCOPE = 2,
- HSA_PACKET_HEADER_WIDTH_SCRELEASE_FENCE_SCOPE = 2,
- /**
- * @deprecated Use HSA_PACKET_HEADER_WIDTH_SCRELEASE_FENCE_SCOPE.
- */
- HSA_PACKET_HEADER_WIDTH_RELEASE_FENCE_SCOPE = 2
- } hsa_packet_header_width_t;
-
-/**
- * @brief Sub-fields of the kernel dispatch packet @a setup field. The offset
- * (with respect to the address of @a setup) of a sub-field is identical to its
- * enumeration constant. The width of each sub-field is determined by the
- * corresponding value in ::hsa_kernel_dispatch_packet_setup_width_t. The
- * offset and the width are expressed in bits.
- */
- typedef enum {
- /**
- * Number of dimensions of the grid. Valid values are 1, 2, or 3.
- *
- */
- HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS = 0
- } hsa_kernel_dispatch_packet_setup_t;
-
-/**
- * @brief Width (in bits) of the sub-fields in
- * ::hsa_kernel_dispatch_packet_setup_t.
- */
- typedef enum {
- HSA_KERNEL_DISPATCH_PACKET_SETUP_WIDTH_DIMENSIONS = 2
- } hsa_kernel_dispatch_packet_setup_width_t;
-
-/**
- * @brief AQL kernel dispatch packet
- */
-typedef struct hsa_kernel_dispatch_packet_s {
- /**
- * Packet header. Used to configure multiple packet parameters such as the
- * packet type. The parameters are described by ::hsa_packet_header_t.
- */
- uint16_t header;
-
- /**
- * Dispatch setup parameters. Used to configure kernel dispatch parameters
- * such as the number of dimensions in the grid. The parameters are described
- * by ::hsa_kernel_dispatch_packet_setup_t.
- */
- uint16_t setup;
-
- /**
- * X dimension of work-group, in work-items. Must be greater than 0.
- */
- uint16_t workgroup_size_x;
-
- /**
- * Y dimension of work-group, in work-items. Must be greater than
- * 0. If the grid has 1 dimension, the only valid value is 1.
- */
- uint16_t workgroup_size_y;
-
- /**
- * Z dimension of work-group, in work-items. Must be greater than
- * 0. If the grid has 1 or 2 dimensions, the only valid value is 1.
- */
- uint16_t workgroup_size_z;
-
- /**
- * Reserved. Must be 0.
- */
- uint16_t reserved0;
-
- /**
- * X dimension of grid, in work-items. Must be greater than 0. Must
- * not be smaller than @a workgroup_size_x.
- */
- uint32_t grid_size_x;
-
- /**
- * Y dimension of grid, in work-items. Must be greater than 0. If the grid has
- * 1 dimension, the only valid value is 1. Must not be smaller than @a
- * workgroup_size_y.
- */
- uint32_t grid_size_y;
-
- /**
- * Z dimension of grid, in work-items. Must be greater than 0. If the grid has
- * 1 or 2 dimensions, the only valid value is 1. Must not be smaller than @a
- * workgroup_size_z.
- */
- uint32_t grid_size_z;
-
- /**
- * Size in bytes of private memory allocation request (per work-item).
- */
- uint32_t private_segment_size;
-
- /**
- * Size in bytes of group memory allocation request (per work-group). Must not
- * be less than the sum of the group memory used by the kernel (and the
- * functions it calls directly or indirectly) and the dynamically allocated
- * group segment variables.
- */
- uint32_t group_segment_size;
-
- /**
- * Opaque handle to a code object that includes an implementation-defined
- * executable code for the kernel.
- */
- uint64_t kernel_object;
-
-#ifdef HSA_LARGE_MODEL
- void* kernarg_address;
-#elif defined HSA_LITTLE_ENDIAN
- /**
- * Pointer to a buffer containing the kernel arguments. May be NULL.
- *
- * The buffer must be allocated using ::hsa_memory_allocate, and must not be
- * modified once the kernel dispatch packet is enqueued until the dispatch has
- * completed execution.
- */
- void* kernarg_address;
- /**
- * Reserved. Must be 0.
- */
- uint32_t reserved1;
-#else
- uint32_t reserved1;
- void* kernarg_address;
-#endif
-
- /**
- * Reserved. Must be 0.
- */
- uint64_t reserved2;
-
- /**
- * Signal used to indicate completion of the job. The application can use the
- * special signal handle 0 to indicate that no signal is used.
- */
- hsa_signal_t completion_signal;
-
-} hsa_kernel_dispatch_packet_t;
-
-/**
- * @brief Agent dispatch packet.
- */
-typedef struct hsa_agent_dispatch_packet_s {
- /**
- * Packet header. Used to configure multiple packet parameters such as the
- * packet type. The parameters are described by ::hsa_packet_header_t.
- */
- uint16_t header;
-
- /**
- * Application-defined function to be performed by the destination agent.
- */
- uint16_t type;
-
- /**
- * Reserved. Must be 0.
- */
- uint32_t reserved0;
-
-#ifdef HSA_LARGE_MODEL
- void* return_address;
-#elif defined HSA_LITTLE_ENDIAN
- /**
- * Address where to store the function return values, if any.
- */
- void* return_address;
- /**
- * Reserved. Must be 0.
- */
- uint32_t reserved1;
-#else
- uint32_t reserved1;
- void* return_address;
-#endif
-
- /**
- * Function arguments.
- */
- uint64_t arg[4];
-
- /**
- * Reserved. Must be 0.
- */
- uint64_t reserved2;
-
- /**
- * Signal used to indicate completion of the job. The application can use the
- * special signal handle 0 to indicate that no signal is used.
- */
- hsa_signal_t completion_signal;
-
-} hsa_agent_dispatch_packet_t;
-
-/**
- * @brief Barrier-AND packet.
- */
-typedef struct hsa_barrier_and_packet_s {
- /**
- * Packet header. Used to configure multiple packet parameters such as the
- * packet type. The parameters are described by ::hsa_packet_header_t.
- */
- uint16_t header;
-
- /**
- * Reserved. Must be 0.
- */
- uint16_t reserved0;
-
- /**
- * Reserved. Must be 0.
- */
- uint32_t reserved1;
-
- /**
- * Array of dependent signal objects. Signals with a handle value of 0 are
- * allowed and are interpreted by the packet processor as satisfied
- * dependencies.
- */
- hsa_signal_t dep_signal[5];
-
- /**
- * Reserved. Must be 0.
- */
- uint64_t reserved2;
-
- /**
- * Signal used to indicate completion of the job. The application can use the
- * special signal handle 0 to indicate that no signal is used.
- */
- hsa_signal_t completion_signal;
-
-} hsa_barrier_and_packet_t;
-
-/**
- * @brief Barrier-OR packet.
- */
-typedef struct hsa_barrier_or_packet_s {
- /**
- * Packet header. Used to configure multiple packet parameters such as the
- * packet type. The parameters are described by ::hsa_packet_header_t.
- */
- uint16_t header;
-
- /**
- * Reserved. Must be 0.
- */
- uint16_t reserved0;
-
- /**
- * Reserved. Must be 0.
- */
- uint32_t reserved1;
-
- /**
- * Array of dependent signal objects. Signals with a handle value of 0 are
- * allowed and are interpreted by the packet processor as dependencies not
- * satisfied.
- */
- hsa_signal_t dep_signal[5];
-
- /**
- * Reserved. Must be 0.
- */
- uint64_t reserved2;
-
- /**
- * Signal used to indicate completion of the job. The application can use the
- * special signal handle 0 to indicate that no signal is used.
- */
- hsa_signal_t completion_signal;
-
-} hsa_barrier_or_packet_t;
-
-/** @} */
-
-/** \addtogroup memory Memory
- * @{
- */
-
-/**
- * @brief Memory segments associated with a region.
- */
-typedef enum {
- /**
- * Global segment. Used to hold data that is shared by all agents.
- */
- HSA_REGION_SEGMENT_GLOBAL = 0,
- /**
- * Read-only segment. Used to hold data that remains constant during the
- * execution of a kernel.
- */
- HSA_REGION_SEGMENT_READONLY = 1,
- /**
- * Private segment. Used to hold data that is local to a single work-item.
- */
- HSA_REGION_SEGMENT_PRIVATE = 2,
- /**
- * Group segment. Used to hold data that is shared by the work-items of a
- * work-group.
- */
- HSA_REGION_SEGMENT_GROUP = 3,
- /**
- * Kernarg segment. Used to store kernel arguments.
- */
- HSA_REGION_SEGMENT_KERNARG = 4
-} hsa_region_segment_t;
-
-/**
- * @brief Global region flags.
- */
-typedef enum {
- /**
- * The application can use memory in the region to store kernel arguments, and
- * provide the values for the kernarg segment of a kernel dispatch. If this
- * flag is set, then ::HSA_REGION_GLOBAL_FLAG_FINE_GRAINED must be set.
- */
- HSA_REGION_GLOBAL_FLAG_KERNARG = 1,
- /**
- * Updates to memory in this region are immediately visible to all the
- * agents under the terms of the HSA memory model. If this
- * flag is set, then ::HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED must not be set.
- */
- HSA_REGION_GLOBAL_FLAG_FINE_GRAINED = 2,
- /**
- * Updates to memory in this region can be performed by a single agent at
- * a time. If a different agent in the system is allowed to access the
- * region, the application must explicitely invoke ::hsa_memory_assign_agent
- * in order to transfer ownership to that agent for a particular buffer.
- */
- HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED = 4
-} hsa_region_global_flag_t;
-
-/**
- * @brief Attributes of a memory region.
- */
-typedef enum {
- /**
- * Segment where memory in the region can be used. The type of this
- * attribute is ::hsa_region_segment_t.
- */
- HSA_REGION_INFO_SEGMENT = 0,
- /**
- * Flag mask. The value of this attribute is undefined if the value of
- * ::HSA_REGION_INFO_SEGMENT is not ::HSA_REGION_SEGMENT_GLOBAL. The type of
- * this attribute is uint32_t, a bit-field of ::hsa_region_global_flag_t
- * values.
- */
- HSA_REGION_INFO_GLOBAL_FLAGS = 1,
- /**
- * Size of this region, in bytes. The type of this attribute is size_t.
- */
- HSA_REGION_INFO_SIZE = 2,
- /**
- * Maximum allocation size in this region, in bytes. Must not exceed the value
- * of ::HSA_REGION_INFO_SIZE. The type of this attribute is size_t.
- *
- * If the region is in the global or readonly segments, this is the maximum
- * size that the application can pass to ::hsa_memory_allocate.
- *
- * If the region is in the group segment, this is the maximum size (per
- * work-group) that can be requested for a given kernel dispatch. If the
- * region is in the private segment, this is the maximum size (per work-item)
- * that can be requested for a specific kernel dispatch, and must be at least
- * 256 bytes.
- */
- HSA_REGION_INFO_ALLOC_MAX_SIZE = 4,
- /**
- * Maximum size (per work-group) of private memory that can be requested for a
- * specific kernel dispatch. Must be at least 65536 bytes. The type of this
- * attribute is uint32_t. The value of this attribute is undefined if the
- * region is not in the private segment.
- */
- HSA_REGION_INFO_ALLOC_MAX_PRIVATE_WORKGROUP_SIZE = 8,
- /**
- * Indicates whether memory in this region can be allocated using
- * ::hsa_memory_allocate. The type of this attribute is bool.
- *
- * The value of this flag is always false for regions in the group and private
- * segments.
- */
- HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED = 5,
- /**
- * Allocation granularity of buffers allocated by ::hsa_memory_allocate in
- * this region. The size of a buffer allocated in this region is a multiple of
- * the value of this attribute. The value of this attribute is only defined if
- * ::HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED is true for this region. The type
- * of this attribute is size_t.
- */
- HSA_REGION_INFO_RUNTIME_ALLOC_GRANULE = 6,
- /**
- * Alignment of buffers allocated by ::hsa_memory_allocate in this region. The
- * value of this attribute is only defined if
- * ::HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED is true for this region, and must be
- * a power of 2. The type of this attribute is size_t.
- */
- HSA_REGION_INFO_RUNTIME_ALLOC_ALIGNMENT = 7
-} hsa_region_info_t;
-
-/**
- * @brief Get the current value of an attribute of a region.
- *
- * @param[in] region A valid region.
- *
- * @param[in] attribute Attribute to query.
- *
- * @param[out] value Pointer to a application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_REGION The region is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
- * region attribute, or @p value is NULL.
- */
-hsa_status_t HSA_API hsa_region_get_info(
- hsa_region_t region,
- hsa_region_info_t attribute,
- void* value);
-
-/**
- * @brief Iterate over the memory regions associated with a given agent, and
- * invoke an application-defined callback on every iteration.
- *
- * @param[in] agent A valid agent.
- *
- * @param[in] callback Callback to be invoked once per region that is
- * accessible from the agent. The HSA runtime passes two arguments to the
- * callback, the region and the application data. If @p callback returns a
- * status other than ::HSA_STATUS_SUCCESS for a particular iteration, the
- * traversal stops and ::hsa_agent_iterate_regions returns that status value.
- *
- * @param[in] data Application data that is passed to @p callback on every
- * iteration. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
- */
-hsa_status_t HSA_API hsa_agent_iterate_regions(
- hsa_agent_t agent,
- hsa_status_t (*callback)(hsa_region_t region, void* data),
- void* data);
-
-/**
- * @brief Allocate a block of memory in a given region.
- *
- * @param[in] region Region where to allocate memory from. The region must have
- * the ::HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED flag set.
- *
- * @param[in] size Allocation size, in bytes. Must not be zero. This value is
- * rounded up to the nearest multiple of ::HSA_REGION_INFO_RUNTIME_ALLOC_GRANULE
- * in @p region.
- *
- * @param[out] ptr Pointer to the location where to store the base address of
- * the allocated block. The returned base address is aligned to the value of
- * ::HSA_REGION_INFO_RUNTIME_ALLOC_ALIGNMENT in @p region. If the allocation
- * fails, the returned value is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
- * the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_REGION The region is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION The host is not allowed to
- * allocate memory in @p region, or @p size is greater than the value of
- * HSA_REGION_INFO_ALLOC_MAX_SIZE in @p region.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is NULL, or @p size is 0.
- */
-hsa_status_t HSA_API hsa_memory_allocate(hsa_region_t region,
- size_t size,
- void** ptr);
-
-/**
- * @brief Deallocate a block of memory previously allocated using
- * ::hsa_memory_allocate.
- *
- * @param[in] ptr Pointer to a memory block. If @p ptr does not match a value
- * previously returned by ::hsa_memory_allocate, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- */
-hsa_status_t HSA_API hsa_memory_free(void* ptr);
-
-/**
- * @brief Copy a block of memory from the location pointed to by @p src to the
- * memory block pointed to by @p dst.
- *
- * @param[out] dst Buffer where the content is to be copied. If @p dst is in
- * coarse-grained memory, the copied data is only visible to the agent currently
- * assigned (::hsa_memory_assign_agent) to @p dst.
- *
- * @param[in] src A valid pointer to the source of data to be copied. The source
- * buffer must not overlap with the destination buffer. If the source buffer is
- * in coarse-grained memory then it must be assigned to an agent, from which the
- * data will be retrieved.
- *
- * @param[in] size Number of bytes to copy. If @p size is 0, no copy is
- * performed and the function returns success. Copying a number of bytes larger
- * than the size of the buffers pointed by @p dst or @p src results in undefined
- * behavior.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT The source or destination
- * pointers are NULL.
- */
-hsa_status_t HSA_API hsa_memory_copy(
- void *dst,
- const void *src,
- size_t size);
-
-/**
- * @brief Change the ownership of a global, coarse-grained buffer.
- *
- * @details The contents of a coarse-grained buffer are visible to an agent
- * only after ownership has been explicitely transferred to that agent. Once the
- * operation completes, the previous owner cannot longer access the data in the
- * buffer.
- *
- * An implementation of the HSA runtime is allowed, but not required, to change
- * the physical location of the buffer when ownership is transferred to a
- * different agent. In general the application must not assume this
- * behavior. The virtual location (address) of the passed buffer is never
- * modified.
- *
- * @param[in] ptr Base address of a global buffer. The pointer must match an
- * address previously returned by ::hsa_memory_allocate. The size of the buffer
- * affected by the ownership change is identical to the size of that previous
- * allocation. If @p ptr points to a fine-grained global buffer, no operation is
- * performed and the function returns success. If @p ptr does not point to
- * global memory, the behavior is undefined.
- *
- * @param[in] agent Agent that becomes the owner of the buffer. The
- * application is responsible for ensuring that @p agent has access to the
- * region that contains the buffer. It is allowed to change ownership to an
- * agent that is already the owner of the buffer, with the same or different
- * access permissions.
- *
- * @param[in] access Access permissions requested for the new owner.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
- * the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is NULL, or @p access is
- * not a valid access value.
- */
-hsa_status_t HSA_API hsa_memory_assign_agent(
- void *ptr,
- hsa_agent_t agent,
- hsa_access_permission_t access);
-
-/**
- *
- * @brief Register a global, fine-grained buffer.
- *
- * @details Registering a buffer serves as an indication to the HSA runtime that
- * the memory might be accessed from a kernel agent other than the
- * host. Registration is a performance hint that allows the HSA runtime
- * implementation to know which buffers will be accessed by some of the kernel
- * agents ahead of time.
- *
- * Registration is only recommended for buffers in the global segment that have
- * not been allocated using the HSA allocator (::hsa_memory_allocate), but an OS
- * allocator instead. Registering an OS-allocated buffer in the base profile is
- * equivalent to a no-op.
- *
- * Registrations should not overlap.
- *
- * @param[in] ptr A buffer in global, fine-grained memory. If a NULL pointer is
- * passed, no operation is performed. If the buffer has been allocated using
- * ::hsa_memory_allocate, or has already been registered, no operation is
- * performed.
- *
- * @param[in] size Requested registration size in bytes. A size of 0 is
- * only allowed if @p ptr is NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
- * the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p size is 0 but @p ptr
- * is not NULL.
- */
-hsa_status_t HSA_API hsa_memory_register(
- void *ptr,
- size_t size);
-
-/**
- *
- * @brief Deregister memory previously registered using ::hsa_memory_register.
- *
- * @details If the memory interval being deregistered does not match a previous
- * registration (start and end addresses), the behavior is undefined.
- *
- * @param[in] ptr A pointer to the base of the buffer to be deregistered. If
- * a NULL pointer is passed, no operation is performed.
- *
- * @param[in] size Size of the buffer to be deregistered.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- */
-hsa_status_t HSA_API hsa_memory_deregister(
- void *ptr,
- size_t size);
-
-/** @} */
-
-
-/** \defgroup instruction-set-architecture Instruction Set Architecture.
- * @{
- */
-
-/**
- * @brief Instruction set architecture.
- */
-typedef struct hsa_isa_s {
- /**
- * Opaque handle. Two handles reference the same object of the enclosing type
- * if and only if they are equal.
- */
- uint64_t handle;
-} hsa_isa_t;
-
-/**
- * @brief Retrieve a reference to an instruction set architecture handle out of
- * a symbolic name.
- *
- * @param[in] name Vendor-specific name associated with a a particular
- * instruction set architecture. @p name must start with the vendor name and a
- * colon (for example, "AMD:"). The rest of the name is vendor-specific. Must be
- * a NUL-terminated string.
- *
- * @param[out] isa Memory location where the HSA runtime stores the ISA handle
- * corresponding to the given name. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ISA_NAME The given name does not
- * correspond to any instruction set architecture.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
- * allocate the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p name is NULL, or @p isa is
- * NULL.
- */
-hsa_status_t HSA_API hsa_isa_from_name(
- const char *name,
- hsa_isa_t *isa);
-
-/**
- * @brief Iterate over the instruction sets supported by the given agent, and
- * invoke an application-defined callback on every iteration. The iterator is
- * deterministic: if an agent supports several instruction set architectures,
- * they are traversed in the same order in every invocation of this function.
- *
- * @param[in] agent A valid agent.
- *
- * @param[in] callback Callback to be invoked once per instruction set
- * architecture. The HSA runtime passes two arguments to the callback: the
- * ISA and the application data. If @p callback returns a status other than
- * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
- * that status value is returned.
- *
- * @param[in] data Application data that is passed to @p callback on every
- * iteration. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
- */
-hsa_status_t HSA_API hsa_agent_iterate_isas(
- hsa_agent_t agent,
- hsa_status_t (*callback)(hsa_isa_t isa, void *data),
- void *data);
-
-/**
- * @brief Instruction set architecture attributes.
- */
-typedef enum {
- /**
- * The length of the ISA name in bytes, not including the NUL terminator. The
- * type of this attribute is uint32_t.
- */
- HSA_ISA_INFO_NAME_LENGTH = 0,
- /**
- * Human-readable description. The type of this attribute is character array
- * with the length equal to the value of ::HSA_ISA_INFO_NAME_LENGTH attribute.
- */
- HSA_ISA_INFO_NAME = 1,
- /**
- * @deprecated
- *
- * Number of call conventions supported by the instruction set architecture.
- * Must be greater than zero. The type of this attribute is uint32_t.
- */
- HSA_ISA_INFO_CALL_CONVENTION_COUNT = 2,
- /**
- * @deprecated
- *
- * Number of work-items in a wavefront for a given call convention. Must be a
- * power of 2 in the range [1,256]. The type of this attribute is uint32_t.
- */
- HSA_ISA_INFO_CALL_CONVENTION_INFO_WAVEFRONT_SIZE = 3,
- /**
- * @deprecated
- *
- * Number of wavefronts per compute unit for a given call convention. In
- * practice, other factors (for example, the amount of group memory used by a
- * work-group) may further limit the number of wavefronts per compute
- * unit. The type of this attribute is uint32_t.
- */
- HSA_ISA_INFO_CALL_CONVENTION_INFO_WAVEFRONTS_PER_COMPUTE_UNIT = 4,
- /**
- * Machine models supported by the instruction set architecture. The type of
- * this attribute is a bool[2]. If the ISA supports the small machine model,
- * the element at index ::HSA_MACHINE_MODEL_SMALL is true. If the ISA supports
- * the large model, the element at index ::HSA_MACHINE_MODEL_LARGE is true.
- */
- HSA_ISA_INFO_MACHINE_MODELS = 5,
- /**
- * Profiles supported by the instruction set architecture. The type of this
- * attribute is a bool[2]. If the ISA supports the base profile, the element
- * at index ::HSA_PROFILE_BASE is true. If the ISA supports the full profile,
- * the element at index ::HSA_PROFILE_FULL is true.
- */
- HSA_ISA_INFO_PROFILES = 6,
- /**
- * Default floating-point rounding modes supported by the instruction set
- * architecture. The type of this attribute is a bool[3]. The value at a given
- * index is true if the corresponding rounding mode in
- * ::hsa_default_float_rounding_mode_t is supported. At least one default mode
- * has to be supported.
- *
- * If the default mode is supported, then
- * ::HSA_ISA_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES must report that
- * both the zero and the near roundings modes are supported.
- */
- HSA_ISA_INFO_DEFAULT_FLOAT_ROUNDING_MODES = 7,
- /**
- * Default floating-point rounding modes supported by the instruction set
- * architecture in the Base profile. The type of this attribute is a
- * bool[3]. The value at a given index is true if the corresponding rounding
- * mode in ::hsa_default_float_rounding_mode_t is supported. The value at
- * index HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT must be false. At least one
- * of the values at indexes ::HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO or
- * HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR must be true.
- */
- HSA_ISA_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES = 8,
- /**
- * Flag indicating that the f16 HSAIL operation is at least as fast as the
- * f32 operation in the instruction set architecture. The type of this
- * attribute is bool.
- */
- HSA_ISA_INFO_FAST_F16_OPERATION = 9,
- /**
- * Maximum number of work-items of each dimension of a work-group. Each
- * maximum must be greater than 0. No maximum can exceed the value of
- * ::HSA_ISA_INFO_WORKGROUP_MAX_SIZE. The type of this attribute is
- * uint16_t[3].
- */
- HSA_ISA_INFO_WORKGROUP_MAX_DIM = 12,
- /**
- * Maximum total number of work-items in a work-group. The type
- * of this attribute is uint32_t.
- */
- HSA_ISA_INFO_WORKGROUP_MAX_SIZE = 13,
- /**
- * Maximum number of work-items of each dimension of a grid. Each maximum must
- * be greater than 0, and must not be smaller than the corresponding value in
- * ::HSA_ISA_INFO_WORKGROUP_MAX_DIM. No maximum can exceed the value of
- * ::HSA_ISA_INFO_GRID_MAX_SIZE. The type of this attribute is
- * ::hsa_dim3_t.
- */
- HSA_ISA_INFO_GRID_MAX_DIM = 14,
- /**
- * Maximum total number of work-items in a grid. The type of this
- * attribute is uint64_t.
- */
- HSA_ISA_INFO_GRID_MAX_SIZE = 16,
- /**
- * Maximum number of fbarriers per work-group. Must be at least 32. The
- * type of this attribute is uint32_t.
- */
- HSA_ISA_INFO_FBARRIER_MAX_SIZE = 17
-} hsa_isa_info_t;
-
-/**
- * @deprecated The concept of call convention has been deprecated. If the
- * application wants to query the value of an attribute for a given instruction
- * set architecture, use ::hsa_isa_get_info_alt instead. If the application
- * wants to query an attribute that is specific to a given combination of ISA
- * and wavefront, use ::hsa_wavefront_get_info.
- *
- * @brief Get the current value of an attribute for a given instruction set
- * architecture (ISA).
- *
- * @param[in] isa A valid instruction set architecture.
- *
- * @param[in] attribute Attribute to query.
- *
- * @param[in] index Call convention index. Used only for call convention
- * attributes, otherwise ignored. Must have a value between 0 (inclusive) and
- * the value of the attribute ::HSA_ISA_INFO_CALL_CONVENTION_COUNT (not
- * inclusive) in @p isa.
- *
- * @param[out] value Pointer to an application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ISA The instruction set architecture is
- * invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_INDEX The index is out of range.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
- * instruction set architecture attribute, or @p value is
- * NULL.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_isa_get_info(
- hsa_isa_t isa,
- hsa_isa_info_t attribute,
- uint32_t index,
- void *value);
-
-/**
- * @brief Get the current value of an attribute for a given instruction set
- * architecture (ISA).
- *
- * @param[in] isa A valid instruction set architecture.
- *
- * @param[in] attribute Attribute to query.
- *
- * @param[out] value Pointer to an application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ISA The instruction set architecture is
- * invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
- * instruction set architecture attribute, or @p value is
- * NULL.
- */
-hsa_status_t HSA_API hsa_isa_get_info_alt(
- hsa_isa_t isa,
- hsa_isa_info_t attribute,
- void *value);
-
-/**
- * @brief Retrieve the exception policy support for a given combination of
- * instruction set architecture and profile.
- *
- * @param[in] isa A valid instruction set architecture.
- *
- * @param[in] profile Profile.
- *
- * @param[out] mask Pointer to a memory location where the HSA runtime stores a
- * mask of ::hsa_exception_policy_t values. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ISA The instruction set architecture is
- * invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p profile is not a valid
- * profile, or @p mask is NULL.
- */
-hsa_status_t HSA_API hsa_isa_get_exception_policies(
- hsa_isa_t isa,
- hsa_profile_t profile,
- uint16_t *mask);
-
-/**
- * @brief Floating-point types.
- */
-typedef enum {
- /**
- * 16-bit floating-point type.
- */
- HSA_FP_TYPE_16 = 1,
- /**
- * 32-bit floating-point type.
- */
- HSA_FP_TYPE_32 = 2,
- /**
- * 64-bit floating-point type.
- */
- HSA_FP_TYPE_64 = 4
-} hsa_fp_type_t;
-
-/**
- * @brief Flush to zero modes.
- */
-typedef enum {
- /**
- * Flush to zero.
- */
- HSA_FLUSH_MODE_FTZ = 1,
- /**
- * Do not flush to zero.
- */
- HSA_FLUSH_MODE_NON_FTZ = 2
-} hsa_flush_mode_t;
-
-/**
- * @brief Round methods.
- */
-typedef enum {
- /**
- * Single round method.
- */
- HSA_ROUND_METHOD_SINGLE = 1,
- /**
- * Double round method.
- */
- HSA_ROUND_METHOD_DOUBLE = 2
-} hsa_round_method_t;
-
-/**
- * @brief Retrieve the round method (single or double) used to implement the
- * floating-point multiply add instruction (mad) for a given combination of
- * instruction set architecture, floating-point type, and flush to zero
- * modifier.
- *
- * @param[in] isa Instruction set architecture.
- *
- * @param[in] fp_type Floating-point type.
- *
- * @param[in] flush_mode Flush to zero modifier.
- *
- * @param[out] round_method Pointer to a memory location where the HSA
- * runtime stores the round method used by the implementation. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ISA The instruction set architecture is
- * invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p fp_type is not a valid
- * floating-point type, or @p flush_mode is not a valid flush to zero modifier,
- * or @p round_method is NULL.
- */
-hsa_status_t HSA_API hsa_isa_get_round_method(
- hsa_isa_t isa,
- hsa_fp_type_t fp_type,
- hsa_flush_mode_t flush_mode,
- hsa_round_method_t *round_method);
-
-/**
- * @brief Wavefront handle
- */
-typedef struct hsa_wavefront_s {
- /**
- * Opaque handle. Two handles reference the same object of the enclosing type
- * if and only if they are equal.
- */
- uint64_t handle;
-} hsa_wavefront_t;
-
-/**
- * @brief Wavefront attributes.
- */
-typedef enum {
- /**
- * Number of work-items in the wavefront. Must be a power of 2 in the range
- * [1,256]. The type of this attribute is uint32_t.
- */
- HSA_WAVEFRONT_INFO_SIZE = 0
-} hsa_wavefront_info_t;
-
-/**
- * @brief Get the current value of a wavefront attribute.
- *
- * @param[in] wavefront A wavefront.
- *
- * @param[in] attribute Attribute to query.
- *
- * @param[out] value Pointer to an application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_WAVEFRONT The wavefront is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
- * wavefront attribute, or @p value is NULL.
- */
-hsa_status_t HSA_API hsa_wavefront_get_info(
- hsa_wavefront_t wavefront,
- hsa_wavefront_info_t attribute,
- void *value);
-
-/**
- * @brief Iterate over the different wavefronts supported by an instruction set
- * architecture, and invoke an application-defined callback on every iteration.
- *
- * @param[in] isa Instruction set architecture.
- *
- * @param[in] callback Callback to be invoked once per wavefront that is
- * supported by the agent. The HSA runtime passes two arguments to the callback:
- * the wavefront handle and the application data. If @p callback returns a
- * status other than ::HSA_STATUS_SUCCESS for a particular iteration, the
- * traversal stops and that value is returned.
- *
- * @param[in] data Application data that is passed to @p callback on every
- * iteration. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ISA The instruction set architecture is
- * invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
- */
-hsa_status_t HSA_API hsa_isa_iterate_wavefronts(
- hsa_isa_t isa,
- hsa_status_t (*callback)(hsa_wavefront_t wavefront, void *data),
- void *data);
-
-/**
- * @deprecated Use ::hsa_agent_iterate_isas to query which instructions set
- * architectures are supported by a given agent.
- *
- * @brief Check if the instruction set architecture of a code object can be
- * executed on an agent associated with another architecture.
- *
- * @param[in] code_object_isa Instruction set architecture associated with a
- * code object.
- *
- * @param[in] agent_isa Instruction set architecture associated with an agent.
- *
- * @param[out] result Pointer to a memory location where the HSA runtime stores
- * the result of the check. If the two architectures are compatible, the result
- * is true; if they are incompatible, the result is false.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ISA @p code_object_isa or @p agent_isa are
- * invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p result is NULL.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_isa_compatible(
- hsa_isa_t code_object_isa,
- hsa_isa_t agent_isa,
- bool *result);
-
-/** @} */
-
-
-/** \defgroup executable Executable
- * @{
- */
-
-/**
- * @brief Code object reader handle. A code object reader is used to
- * load a code object from file (when created using
- * ::hsa_code_object_reader_create_from_file), or from memory (if created using
- * ::hsa_code_object_reader_create_from_memory).
- */
-typedef struct hsa_code_object_reader_s {
- /**
- * Opaque handle. Two handles reference the same object of the enclosing type
- * if and only if they are equal.
- */
- uint64_t handle;
-} hsa_code_object_reader_t;
-
-/**
- * @brief Create a code object reader to operate on a file.
- *
- * @param[in] file File descriptor. The file must have been opened by
- * application with at least read permissions prior calling this function. The
- * file must contain a vendor-specific code object.
- *
- * The file is owned and managed by the application; the lifetime of the file
- * descriptor must exceed that of any associated code object reader.
- *
- * @param[out] code_object_reader Memory location to store the newly created
- * code object reader handle. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_FILE @p file is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
- * allocate the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p code_object_reader is NULL.
- */
-hsa_status_t HSA_API hsa_code_object_reader_create_from_file(
- hsa_file_t file,
- hsa_code_object_reader_t *code_object_reader);
-
-/**
- * @brief Create a code object reader to operate on memory.
- *
- * @param[in] code_object Memory buffer that contains a vendor-specific code
- * object. The buffer is owned and managed by the application; the lifetime of
- * the buffer must exceed that of any associated code object reader.
- *
- * @param[in] size Size of the buffer pointed to by @p code_object. Must not be
- * 0.
- *
- * @param[out] code_object_reader Memory location to store newly created code
- * object reader handle. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
- * allocate the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p code_object is NULL, @p size
- * is zero, or @p code_object_reader is NULL.
- */
-hsa_status_t HSA_API hsa_code_object_reader_create_from_memory(
- const void *code_object,
- size_t size,
- hsa_code_object_reader_t *code_object_reader);
-
-/**
- * @brief Destroy a code object reader.
- *
- * @details The code object reader handle becomes invalid after completion of
- * this function. Any file or memory used to create the code object read is not
- * closed, removed, or deallocated by this function.
- *
- * @param[in] code_object_reader Code object reader to destroy.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT_READER @p code_object_reader
- * is invalid.
- */
-hsa_status_t HSA_API hsa_code_object_reader_destroy(
- hsa_code_object_reader_t code_object_reader);
-
-/**
- * @brief Struct containing an opaque handle to an executable, which contains
- * ISA for finalized kernels and indirect functions together with the allocated
- * global or readonly segment variables they reference.
- */
-typedef struct hsa_executable_s {
- /**
- * Opaque handle. Two handles reference the same object of the enclosing type
- * if and only if they are equal.
- */
- uint64_t handle;
-} hsa_executable_t;
-
-/**
- * @brief Executable state.
- */
-typedef enum {
- /**
- * Executable state, which allows the user to load code objects and define
- * external variables. Variable addresses, kernel code handles, and
- * indirect function code handles are not available in query operations until
- * the executable is frozen (zero always returned).
- */
- HSA_EXECUTABLE_STATE_UNFROZEN = 0,
- /**
- * Executable state, which allows the user to query variable addresses,
- * kernel code handles, and indirect function code handles using query
- * operations. Loading new code objects, as well as defining external
- * variables, is not allowed in this state.
- */
- HSA_EXECUTABLE_STATE_FROZEN = 1
-} hsa_executable_state_t;
-
-/**
- * @deprecated Use ::hsa_executable_create_alt instead, which allows the
- * application to specify the default floating-point rounding mode of the
- * executable and assumes an unfrozen initial state.
- *
- * @brief Create an empty executable.
- *
- * @param[in] profile Profile used in the executable.
- *
- * @param[in] executable_state Executable state. If the state is
- * ::HSA_EXECUTABLE_STATE_FROZEN, the resulting executable is useless because no
- * code objects can be loaded, and no variables can be defined.
- *
- * @param[in] options Standard and vendor-specific options. Unknown options are
- * ignored. A standard option begins with the "-hsa_" prefix. Options beginning
- * with the "-hsa_ext_<extension_name>_" prefix are reserved for extensions. A
- * vendor-specific option begins with the "-<vendor_name>_" prefix. Must be a
- * NUL-terminated string. May be NULL.
- *
- * @param[out] executable Memory location where the HSA runtime stores the newly
- * created executable handle.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
- * allocate the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p profile is invalid, or
- * @p executable is NULL.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_executable_create(
- hsa_profile_t profile,
- hsa_executable_state_t executable_state,
- const char *options,
- hsa_executable_t *executable);
-
-/**
- * @brief Create an empty executable.
- *
- * @param[in] profile Profile used in the executable.
- *
- * @param[in] default_float_rounding_mode Default floating-point rounding mode
- * used in the executable. Allowed rounding modes are near and zero (default is
- * not allowed).
- *
- * @param[in] options Standard and vendor-specific options. Unknown options are
- * ignored. A standard option begins with the "-hsa_" prefix. Options beginning
- * with the "-hsa_ext_<extension_name>_" prefix are reserved for extensions. A
- * vendor-specific option begins with the "-<vendor_name>_" prefix. Must be a
- * NUL-terminated string. May be NULL.
- *
- * @param[out] executable Memory location where the HSA runtime stores newly
- * created executable handle. The initial state of the executable is
- * ::HSA_EXECUTABLE_STATE_UNFROZEN.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
- * allocate the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p profile is invalid, or
- * @p executable is NULL.
- */
-hsa_status_t HSA_API hsa_executable_create_alt(
- hsa_profile_t profile,
- hsa_default_float_rounding_mode_t default_float_rounding_mode,
- const char *options,
- hsa_executable_t *executable);
-
-/**
- * @brief Destroy an executable.
- *
- * @details An executable handle becomes invalid after the executable has been
- * destroyed. Code object handles that were loaded into this executable are
- * still valid after the executable has been destroyed, and can be used as
- * intended. Resources allocated outside and associated with this executable
- * (such as external global or readonly variables) can be released after the
- * executable has been destroyed.
- *
- * Executable should not be destroyed while kernels are in flight.
- *
- * @param[in] executable Executable.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
- */
-hsa_status_t HSA_API hsa_executable_destroy(
- hsa_executable_t executable);
-
-/**
- * @brief Loaded code object handle.
- */
-typedef struct hsa_loaded_code_object_s {
- /**
- * Opaque handle. Two handles reference the same object of the enclosing type
- * if and only if they are equal.
- */
- uint64_t handle;
-} hsa_loaded_code_object_t;
-
-/**
- * @brief Load a program code object into an executable.
- *
- * @details A program code object contains information about resources that are
- * accessible by all kernel agents that run the executable, and can be loaded
- * at most once into an executable.
- *
- * If the program code object uses extensions, the implementation must support
- * them for this operation to return successfully.
- *
- * @param[in] executable Executable.
- *
- * @param[in] code_object_reader A code object reader that holds the program
- * code object to load. If a code object reader is destroyed before all the
- * associated executables are destroyed, the behavior is undefined.
- *
- * @param[in] options Standard and vendor-specific options. Unknown options are
- * ignored. A standard option begins with the "-hsa_" prefix. Options beginning
- * with the "-hsa_ext_<extension_name>_" prefix are reserved for extensions. A
- * vendor-specific option begins with the "-<vendor_name>_" prefix. Must be a
- * NUL-terminated string. May be NULL.
- *
- * @param[out] loaded_code_object Pointer to a memory location where the HSA
- * runtime stores the loaded code object handle. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
- * allocate the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE The executable is frozen.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT_READER @p code_object_reader
- * is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS The program code object is
- * not compatible with the executable or the implementation (for example, the
- * code object uses an extension that is not supported by the implementation).
- */
-hsa_status_t HSA_API hsa_executable_load_program_code_object(
- hsa_executable_t executable,
- hsa_code_object_reader_t code_object_reader,
- const char *options,
- hsa_loaded_code_object_t *loaded_code_object);
-
-/**
- * @brief Load an agent code object into an executable.
- *
- * @details The agent code object contains all defined agent
- * allocation variables, functions, indirect functions, and kernels in a given
- * program for a given instruction set architecture.
- *
- * Any module linkage declaration must have been defined either by a define
- * variable or by loading a code object that has a symbol with module linkage
- * definition.
- *
- * The default floating-point rounding mode of the code object associated with
- * @p code_object_reader must match that of the executable
- * (::HSA_EXECUTABLE_INFO_DEFAULT_FLOAT_ROUNDING_MODE), or be default (in which
- * case the value of ::HSA_EXECUTABLE_INFO_DEFAULT_FLOAT_ROUNDING_MODE is used).
- * If the agent code object uses extensions, the implementation and the agent
- * must support them for this operation to return successfully.
- *
- * @param[in] executable Executable.
- *
- * @param[in] agent Agent to load code object for. A code object can be loaded
- * into an executable at most once for a given agent. The instruction set
- * architecture of the code object must be supported by the agent.
- *
- * @param[in] code_object_reader A code object reader that holds the code object
- * to load. If a code object reader is destroyed before all the associated
- * executables are destroyed, the behavior is undefined.
- *
- * @param[in] options Standard and vendor-specific options. Unknown options are
- * ignored. A standard option begins with the "-hsa_" prefix. Options beginning
- * with the "-hsa_ext_<extension_name>_" prefix are reserved for extensions. A
- * vendor-specific option begins with the "-<vendor_name>_" prefix. Must be a
- * NUL-terminated string. May be NULL.
- *
- * @param[out] loaded_code_object Pointer to a memory location where the HSA
- * runtime stores the loaded code object handle. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
- * allocate the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE The executable is frozen.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT_READER @p code_object_reader
- * is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS The code object read by @p
- * code_object_reader is not compatible with the agent (for example, the agent
- * does not support the instruction set architecture of the code object), the
- * executable (for example, there is a default floating-point mode mismatch
- * between the two), or the implementation.
- */
-hsa_status_t HSA_API hsa_executable_load_agent_code_object(
- hsa_executable_t executable,
- hsa_agent_t agent,
- hsa_code_object_reader_t code_object_reader,
- const char *options,
- hsa_loaded_code_object_t *loaded_code_object);
-
-/**
- * @brief Freeze the executable.
- *
- * @details No modifications to executable can be made after freezing: no code
- * objects can be loaded to the executable, and no external variables can be
- * defined. Freezing the executable does not prevent querying the executable's
- * attributes. The application must define all the external variables in an
- * executable before freezing it.
- *
- * @param[in] executable Executable.
- *
- * @param[in] options Standard and vendor-specific options. Unknown options are
- * ignored. A standard option begins with the "-hsa_" prefix. Options beginning
- * with the "-hsa_ext_<extension_name>_" prefix are reserved for extensions. A
- * vendor-specific option begins with the "-<vendor_name>_" prefix. Must be a
- * NUL-terminated string. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_VARIABLE_UNDEFINED One or more variables are
- * undefined in the executable.
- *
- * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is already frozen.
- */
-hsa_status_t HSA_API hsa_executable_freeze(
- hsa_executable_t executable,
- const char *options);
-
-/**
- * @brief Executable attributes.
- */
-typedef enum {
- /**
- * Profile this executable is created for. The type of this attribute is
- * ::hsa_profile_t.
- */
- HSA_EXECUTABLE_INFO_PROFILE = 1,
- /**
- * Executable state. The type of this attribute is ::hsa_executable_state_t.
- */
- HSA_EXECUTABLE_INFO_STATE = 2,
- /**
- * Default floating-point rounding mode specified when executable was created.
- * The type of this attribute is ::hsa_default_float_rounding_mode_t.
- */
- HSA_EXECUTABLE_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 3
-} hsa_executable_info_t;
-
-/**
- * @brief Get the current value of an attribute for a given executable.
- *
- * @param[in] executable Executable.
- *
- * @param[in] attribute Attribute to query.
- *
- * @param[out] value Pointer to an application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
- * executable attribute, or @p value is NULL.
- */
-hsa_status_t HSA_API hsa_executable_get_info(
- hsa_executable_t executable,
- hsa_executable_info_t attribute,
- void *value);
-
-/**
- * @brief Define an external global variable with program allocation.
- *
- * @details This function allows the application to provide the definition
- * of a variable in the global segment memory with program allocation. The
- * variable must be defined before loading a code object into an executable.
- * In addition, code objects loaded must not define the variable.
- *
- * @param[in] executable Executable. Must not be in frozen state.
- *
- * @param[in] variable_name Name of the variable. The Programmer's Reference
- * Manual describes the standard name mangling scheme.
- *
- * @param[in] address Address where the variable is defined. This address must
- * be in global memory and can be read and written by any agent in the
- * system. The application cannot deallocate the buffer pointed by @p address
- * before @p executable is destroyed.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
- * allocate the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED The variable is
- * already defined.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no variable with the
- * @p variable_name.
- *
- * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is frozen.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p variable_name is NULL.
- */
-hsa_status_t HSA_API hsa_executable_global_variable_define(
- hsa_executable_t executable,
- const char *variable_name,
- void *address);
-
-/**
- * @brief Define an external global variable with agent allocation.
- *
- * @details This function allows the application to provide the definition
- * of a variable in the global segment memory with agent allocation. The
- * variable must be defined before loading a code object into an executable.
- * In addition, code objects loaded must not define the variable.
- *
- * @param[in] executable Executable. Must not be in frozen state.
- *
- * @param[in] agent Agent for which the variable is being defined.
- *
- * @param[in] variable_name Name of the variable. The Programmer's Reference
- * Manual describes the standard name mangling scheme.
- *
- * @param[in] address Address where the variable is defined. This address must
- * have been previously allocated using ::hsa_memory_allocate in a global region
- * that is only visible to @p agent. The application cannot deallocate the
- * buffer pointed by @p address before @p executable is destroyed.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
- * allocate the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT @p agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED The variable is
- * already defined.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no variable with the
- * @p variable_name.
- *
- * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is frozen.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p variable_name is NULL.
- */
-hsa_status_t HSA_API hsa_executable_agent_global_variable_define(
- hsa_executable_t executable,
- hsa_agent_t agent,
- const char *variable_name,
- void *address);
-
-/**
- * @brief Define an external readonly variable.
- *
- * @details This function allows the application to provide the definition
- * of a variable in the readonly segment memory. The variable must be defined
- * before loading a code object into an executable. In addition, code objects
- * loaded must not define the variable.
- *
- * @param[in] executable Executable. Must not be in frozen state.
- *
- * @param[in] agent Agent for which the variable is being defined.
- *
- * @param[in] variable_name Name of the variable. The Programmer's Reference
- * Manual describes the standard name mangling scheme.
- *
- * @param[in] address Address where the variable is defined. This address must
- * have been previously allocated using ::hsa_memory_allocate in a readonly
- * region associated with @p agent. The application cannot deallocate the buffer
- * pointed by @p address before @p executable is destroyed.
- *
- * @param[in] address Address where the variable is defined. The buffer pointed
- * by @p address is owned by the application, and cannot be deallocated before
- * @p executable is destroyed.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
- * allocate the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE Executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT @p agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED The variable is
- * already defined.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no variable with the
- * @p variable_name.
- *
- * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is frozen.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p variable_name is NULL.
- */
-hsa_status_t HSA_API hsa_executable_readonly_variable_define(
- hsa_executable_t executable,
- hsa_agent_t agent,
- const char *variable_name,
- void *address);
-
-/**
- * @brief Validate an executable. Checks that all code objects have matching
- * machine model, profile, and default floating-point rounding mode. Checks that
- * all declarations have definitions. Checks declaration-definition
- * compatibility (see the HSA Programming Reference Manual for compatibility
- * rules). Invoking this function is equivalent to invoking
- * ::hsa_executable_validate_alt with no options.
- *
- * @param[in] executable Executable. Must be in frozen state.
- *
- * @param[out] result Memory location where the HSA runtime stores the
- * validation result. If the executable passes validation, the result is 0.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE @p executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p result is NULL.
- */
-hsa_status_t HSA_API hsa_executable_validate(
- hsa_executable_t executable,
- uint32_t *result);
-
-/**
- * @brief Validate an executable. Checks that all code objects have matching
- * machine model, profile, and default floating-point rounding mode. Checks that
- * all declarations have definitions. Checks declaration-definition
- * compatibility (see the HSA Programming Reference Manual for compatibility
- * rules).
- *
- * @param[in] executable Executable. Must be in frozen state.
- *
- * @param[in] options Standard and vendor-specific options. Unknown options are
- * ignored. A standard option begins with the "-hsa_" prefix. Options beginning
- * with the "-hsa_ext_<extension_name>_" prefix are reserved for extensions. A
- * vendor-specific option begins with the "-<vendor_name>_" prefix. Must be a
- * NUL-terminated string. May be NULL.
- *
- * @param[out] result Memory location where the HSA runtime stores the
- * validation result. If the executable passes validation, the result is 0.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE @p executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p result is NULL.
- */
-hsa_status_t HSA_API hsa_executable_validate_alt(
- hsa_executable_t executable,
- const char *options,
- uint32_t *result);
-
-/**
- * @brief Executable symbol handle.
- *
- * The lifetime of an executable object symbol matches that of the executable
- * associated with it. An operation on a symbol whose associated executable has
- * been destroyed results in undefined behavior.
- */
-typedef struct hsa_executable_symbol_s {
- /**
- * Opaque handle. Two handles reference the same object of the enclosing type
- * if and only if they are equal.
- */
- uint64_t handle;
-} hsa_executable_symbol_t;
-
-/**
- * @deprecated Use ::hsa_executable_get_symbol_by_name instead.
- *
- * @brief Get the symbol handle for a given a symbol name.
- *
- * @param[in] executable Executable.
- *
- * @param[in] module_name Module name. Must be NULL if the symbol has
- * program linkage.
- *
- * @param[in] symbol_name Symbol name.
- *
- * @param[in] agent Agent associated with the symbol. If the symbol is
- * independent of any agent (for example, a variable with program
- * allocation), this argument is ignored.
- *
- * @param[in] call_convention Call convention associated with the symbol. If the
- * symbol does not correspond to an indirect function, this argument is ignored.
- *
- * @param[out] symbol Memory location where the HSA runtime stores the symbol
- * handle.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no symbol with a name
- * that matches @p symbol_name.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p symbol_name is NULL, or
- * @p symbol is NULL.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_executable_get_symbol(
- hsa_executable_t executable,
- const char *module_name,
- const char *symbol_name,
- hsa_agent_t agent,
- int32_t call_convention,
- hsa_executable_symbol_t *symbol);
-
-/**
- * @brief Retrieve the symbol handle corresponding to a given a symbol name.
- *
- * @param[in] executable Executable.
- *
- * @param[in] symbol_name Symbol name. Must be a NUL-terminated character
- * array. The Programmer's Reference Manual describes the standard name mangling
- * scheme.
- *
- * @param[in] agent Pointer to the agent for which the symbol with the given
- * name is defined. If the symbol corresponding to the given name has program
- * allocation, @p agent must be NULL.
- *
- * @param[out] symbol Memory location where the HSA runtime stores the symbol
- * handle. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no symbol with a name
- * that matches @p symbol_name.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p symbol_name is NULL, or @p
- * symbol is NULL.
- */
-hsa_status_t HSA_API hsa_executable_get_symbol_by_name(
- hsa_executable_t executable,
- const char *symbol_name,
- const hsa_agent_t *agent,
- hsa_executable_symbol_t *symbol);
-
-/**
- * @brief Symbol type.
- */
-typedef enum {
- /**
- * Variable.
- */
- HSA_SYMBOL_KIND_VARIABLE = 0,
- /**
- * Kernel.
- */
- HSA_SYMBOL_KIND_KERNEL = 1,
- /**
- * Indirect function.
- */
- HSA_SYMBOL_KIND_INDIRECT_FUNCTION = 2
-} hsa_symbol_kind_t;
-
-/**
- * @brief Linkage type of a symbol.
- */
-typedef enum {
- /**
- * Module linkage.
- */
- HSA_SYMBOL_LINKAGE_MODULE = 0,
- /**
- * Program linkage.
- */
- HSA_SYMBOL_LINKAGE_PROGRAM = 1
-} hsa_symbol_linkage_t;
-
-/**
- * @brief Allocation type of a variable.
- */
-typedef enum {
- /**
- * Agent allocation.
- */
- HSA_VARIABLE_ALLOCATION_AGENT = 0,
- /**
- * Program allocation.
- */
- HSA_VARIABLE_ALLOCATION_PROGRAM = 1
-} hsa_variable_allocation_t;
-
-/**
- * @brief Memory segment associated with a variable.
- */
-typedef enum {
- /**
- * Global memory segment.
- */
- HSA_VARIABLE_SEGMENT_GLOBAL = 0,
- /**
- * Readonly memory segment.
- */
- HSA_VARIABLE_SEGMENT_READONLY = 1
-} hsa_variable_segment_t;
-
-/**
- * @brief Executable symbol attributes.
- */
-typedef enum {
- /**
- * The kind of the symbol. The type of this attribute is ::hsa_symbol_kind_t.
- */
- HSA_EXECUTABLE_SYMBOL_INFO_TYPE = 0,
- /**
- * The length of the symbol name in bytes, not including the NUL terminator.
- * The type of this attribute is uint32_t.
- */
- HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH = 1,
- /**
- * The name of the symbol. The type of this attribute is character array with
- * the length equal to the value of ::HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH
- * attribute.
- */
- HSA_EXECUTABLE_SYMBOL_INFO_NAME = 2,
- /**
- * @deprecated
- *
- * The length of the module name in bytes (not including the NUL terminator)
- * to which this symbol belongs if this symbol has module linkage, otherwise 0
- * is returned. The type of this attribute is uint32_t.
- */
- HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME_LENGTH = 3,
- /**
- * @deprecated
- *
- * The module name to which this symbol belongs if this symbol has module
- * linkage, otherwise an empty string is returned. The type of this attribute
- * is character array with the length equal to the value of
- * ::HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME_LENGTH attribute.
- */
- HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME = 4,
- /**
- * @deprecated
- *
- * Agent associated with this symbol. If the symbol is a variable, the
- * value of this attribute is only defined if
- * ::HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALLOCATION is
- * ::HSA_VARIABLE_ALLOCATION_AGENT. The type of this attribute is hsa_agent_t.
- */
- HSA_EXECUTABLE_SYMBOL_INFO_AGENT = 20,
- /**
- * The address of the variable. The value of this attribute is undefined if
- * the symbol is not a variable. The type of this attribute is uint64_t.
- *
- * If executable's state is ::HSA_EXECUTABLE_STATE_UNFROZEN, then 0 is
- * returned.
- */
- HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS = 21,
- /**
- * The linkage kind of the symbol. The type of this attribute is
- * ::hsa_symbol_linkage_t.
- */
- HSA_EXECUTABLE_SYMBOL_INFO_LINKAGE = 5,
- /**
- * Indicates whether the symbol corresponds to a definition. The type of this
- * attribute is bool.
- */
- HSA_EXECUTABLE_SYMBOL_INFO_IS_DEFINITION = 17,
- /**
- * @deprecated
- *
- * The allocation kind of the variable. The value of this attribute is
- * undefined if the symbol is not a variable. The type of this attribute is
- * ::hsa_variable_allocation_t.
- */
- HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALLOCATION = 6,
- /**
- * @deprecated
- *
- * The segment kind of the variable. The value of this attribute is undefined
- * if the symbol is not a variable. The type of this attribute is
- * ::hsa_variable_segment_t.
- */
- HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SEGMENT = 7,
- /**
- * @deprecated
- *
- * Alignment of the symbol in memory. The value of this attribute is undefined
- * if the symbol is not a variable. The type of this attribute is uint32_t.
- *
- * The current alignment of the variable in memory may be greater than the
- * value specified in the source program variable declaration.
- */
- HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALIGNMENT = 8,
- /**
- * @deprecated
- *
- * Size of the variable. The value of this attribute is undefined if
- * the symbol is not a variable. The type of this attribute is uint32_t.
- *
- * A value of 0 is returned if the variable is an external variable and has an
- * unknown dimension.
- */
- HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE = 9,
- /**
- * @deprecated
- *
- * Indicates whether the variable is constant. The value of this attribute is
- * undefined if the symbol is not a variable. The type of this attribute is
- * bool.
- */
- HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_IS_CONST = 10,
- /**
- * Kernel object handle, used in the kernel dispatch packet. The value of this
- * attribute is undefined if the symbol is not a kernel. The type of this
- * attribute is uint64_t.
- *
- * If the state of the executable is ::HSA_EXECUTABLE_STATE_UNFROZEN, then 0
- * is returned.
- */
- HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT = 22,
- /**
- * Size of kernarg segment memory that is required to hold the values of the
- * kernel arguments, in bytes. Must be a multiple of 16. The value of this
- * attribute is undefined if the symbol is not a kernel. The type of this
- * attribute is uint32_t.
- */
- HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE = 11,
- /**
- * Alignment (in bytes) of the buffer used to pass arguments to the kernel,
- * which is the maximum of 16 and the maximum alignment of any of the kernel
- * arguments. The value of this attribute is undefined if the symbol is not a
- * kernel. The type of this attribute is uint32_t.
- */
- HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT = 12,
- /**
- * Size of static group segment memory required by the kernel (per
- * work-group), in bytes. The value of this attribute is undefined
- * if the symbol is not a kernel. The type of this attribute is uint32_t.
- *
- * The reported amount does not include any dynamically allocated group
- * segment memory that may be requested by the application when a kernel is
- * dispatched.
- */
- HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE = 13,
- /**
- * Size of static private, spill, and arg segment memory required by
- * this kernel (per work-item), in bytes. The value of this attribute is
- * undefined if the symbol is not a kernel. The type of this attribute is
- * uint32_t.
- *
- * If the value of ::HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK is
- * true, the kernel may use more private memory than the reported value, and
- * the application must add the dynamic call stack usage to @a
- * private_segment_size when populating a kernel dispatch packet.
- */
- HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE = 14,
- /**
- * Dynamic callstack flag. The value of this attribute is undefined if the
- * symbol is not a kernel. The type of this attribute is bool.
- *
- * If this flag is set (the value is true), the kernel uses a dynamically
- * sized call stack. This can happen if recursive calls, calls to indirect
- * functions, or the HSAIL alloca instruction are present in the kernel.
- */
- HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK = 15,
- /**
- * @deprecated
- *
- * Call convention of the kernel. The value of this attribute is undefined if
- * the symbol is not a kernel. The type of this attribute is uint32_t.
- */
- HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_CALL_CONVENTION = 18,
- /**
- * Indirect function object handle. The value of this attribute is undefined
- * if the symbol is not an indirect function, or the associated agent does
- * not support the Full Profile. The type of this attribute depends on the
- * machine model: the type is uint32_t for small machine model, and uint64_t
- * for large model.
- *
- * If the state of the executable is ::HSA_EXECUTABLE_STATE_UNFROZEN, then 0
- * is returned.
- */
- HSA_EXECUTABLE_SYMBOL_INFO_INDIRECT_FUNCTION_OBJECT = 23,
- /**
- * @deprecated
- *
- * Call convention of the indirect function. The value of this attribute is
- * undefined if the symbol is not an indirect function, or the associated
- * agent does not support the Full Profile. The type of this attribute is
- * uint32_t.
- */
- HSA_EXECUTABLE_SYMBOL_INFO_INDIRECT_FUNCTION_CALL_CONVENTION = 16
-} hsa_executable_symbol_info_t;
-
-/**
- * @brief Get the current value of an attribute for a given executable symbol.
- *
- * @param[in] executable_symbol Executable symbol.
- *
- * @param[in] attribute Attribute to query.
- *
- * @param[out] value Pointer to an application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE_SYMBOL The executable symbol is
- * invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
- * executable symbol attribute, or @p value is NULL.
- */
-hsa_status_t HSA_API hsa_executable_symbol_get_info(
- hsa_executable_symbol_t executable_symbol,
- hsa_executable_symbol_info_t attribute,
- void *value);
-
-/**
- * @deprecated
- *
- * @brief Iterate over the symbols in a executable, and invoke an
- * application-defined callback on every iteration.
- *
- * @param[in] executable Executable.
- *
- * @param[in] callback Callback to be invoked once per executable symbol. The
- * HSA runtime passes three arguments to the callback: the executable, a symbol,
- * and the application data. If @p callback returns a status other than
- * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
- * ::hsa_executable_iterate_symbols returns that status value.
- *
- * @param[in] data Application data that is passed to @p callback on every
- * iteration. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_executable_iterate_symbols(
- hsa_executable_t executable,
- hsa_status_t (*callback)(hsa_executable_t exec,
- hsa_executable_symbol_t symbol,
- void *data),
- void *data);
-
-/**
- * @brief Iterate over the kernels, indirect functions, and agent allocation
- * variables in an executable for a given agent, and invoke an application-
- * defined callback on every iteration.
- *
- * @param[in] executable Executable.
- *
- * @param[in] agent Agent.
- *
- * @param[in] callback Callback to be invoked once per executable symbol. The
- * HSA runtime passes three arguments to the callback: the executable, a symbol,
- * and the application data. If @p callback returns a status other than
- * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
- * ::hsa_executable_iterate_symbols returns that status value.
- *
- * @param[in] data Application data that is passed to @p callback on every
- * iteration. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
- */
-hsa_status_t HSA_API hsa_executable_iterate_agent_symbols(
- hsa_executable_t executable,
- hsa_agent_t agent,
- hsa_status_t (*callback)(hsa_executable_t exec,
- hsa_agent_t agent,
- hsa_executable_symbol_t symbol,
- void *data),
- void *data);
-
-/**
- * @brief Iterate over the program allocation variables in an executable, and
- * invoke an application-defined callback on every iteration.
- *
- * @param[in] executable Executable.
- *
- * @param[in] callback Callback to be invoked once per executable symbol. The
- * HSA runtime passes three arguments to the callback: the executable, a symbol,
- * and the application data. If @p callback returns a status other than
- * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
- * ::hsa_executable_iterate_symbols returns that status value.
- *
- * @param[in] data Application data that is passed to @p callback on every
- * iteration. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
- */
-hsa_status_t HSA_API hsa_executable_iterate_program_symbols(
- hsa_executable_t executable,
- hsa_status_t (*callback)(hsa_executable_t exec,
- hsa_executable_symbol_t symbol,
- void *data),
- void *data);
-
-/** @} */
-
-
-/** \defgroup code-object Code Objects (deprecated).
- * @{
- */
-
-/**
- * @deprecated
- *
- * @brief Struct containing an opaque handle to a code object, which contains
- * ISA for finalized kernels and indirect functions together with information
- * about the global or readonly segment variables they reference.
- */
-typedef struct hsa_code_object_s {
- /**
- * Opaque handle. Two handles reference the same object of the enclosing type
- * if and only if they are equal.
- */
- uint64_t handle;
-} hsa_code_object_t;
-
-/**
- * @deprecated
- *
- * @brief Application data handle that is passed to the serialization
- * and deserialization functions.
- */
-typedef struct hsa_callback_data_s {
- /**
- * Opaque handle.
- */
- uint64_t handle;
-} hsa_callback_data_t;
-
-/**
- * @deprecated
- *
- * @brief Serialize a code object. Can be used for offline finalization,
- * install-time finalization, disk code caching, etc.
- *
- * @param[in] code_object Code object.
- *
- * @param[in] alloc_callback Callback function for memory allocation. Must not
- * be NULL. The HSA runtime passes three arguments to the callback: the
- * allocation size, the application data, and a pointer to a memory location
- * where the application stores the allocation result. The HSA runtime invokes
- * @p alloc_callback once to allocate a buffer that contains the serialized
- * version of @p code_object. If the callback returns a status code other than
- * ::HSA_STATUS_SUCCESS, this function returns the same code.
- *
- * @param[in] callback_data Application data that is passed to @p
- * alloc_callback. May be NULL.
- *
- * @param[in] options Standard and vendor-specific options. Unknown options are
- * ignored. A standard option begins with the "-hsa_" prefix. Options beginning
- * with the "-hsa_ext_<extension_name>_" prefix are reserved for extensions. A
- * vendor-specific option begins with the "-<vendor_name>_" prefix. Must be a
- * NUL-terminated string. May be NULL.
- *
- * @param[out] serialized_code_object Memory location where the HSA runtime
- * stores a pointer to the serialized code object. Must not be NULL.
- *
- * @param[out] serialized_code_object_size Memory location where the HSA runtime
- * stores the size (in bytes) of @p serialized_code_object. The returned value
- * matches the allocation size passed by the HSA runtime to @p
- * alloc_callback. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
- * allocate the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p alloc_callback, @p
- * serialized_code_object, or @p serialized_code_object_size are NULL.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_code_object_serialize(
- hsa_code_object_t code_object,
- hsa_status_t (*alloc_callback)(size_t size,
- hsa_callback_data_t data,
- void **address),
- hsa_callback_data_t callback_data,
- const char *options,
- void **serialized_code_object,
- size_t *serialized_code_object_size);
-
-/**
- * @deprecated
- *
- * @brief Deserialize a code object.
- *
- * @param[in] serialized_code_object A serialized code object. Must not be NULL.
- *
- * @param[in] serialized_code_object_size The size (in bytes) of @p
- * serialized_code_object. Must not be 0.
- *
- * @param[in] options Standard and vendor-specific options. Unknown options are
- * ignored. A standard option begins with the "-hsa_" prefix. Options beginning
- * with the "-hsa_ext_<extension_name>_" prefix are reserved for extensions. A
- * vendor-specific option begins with the "-<vendor_name>_" prefix. Must be a
- * NUL-terminated string. May be NULL.
- *
- * @param[out] code_object Memory location where the HSA runtime stores the
- * deserialized code object.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
- * allocate the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p serialized_code_object, or @p
- * code_object are NULL, or @p serialized_code_object_size is 0.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_code_object_deserialize(
- void *serialized_code_object,
- size_t serialized_code_object_size,
- const char *options,
- hsa_code_object_t *code_object);
-
-/**
- * @deprecated
- *
- * @brief Destroy a code object.
- *
- * @details The lifetime of a code object must exceed that of any executable
- * where it has been loaded. If an executable that loaded @p code_object has not
- * been destroyed, the behavior is undefined.
- *
- * @param[in] code_object Code object. The handle becomes invalid after it has
- * been destroyed.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_code_object_destroy(
- hsa_code_object_t code_object);
-
-/**
- * @deprecated
- *
- * @brief Code object type.
- */
-typedef enum {
- /**
- * Produces code object that contains ISA for all kernels and indirect
- * functions in HSA source.
- */
- HSA_CODE_OBJECT_TYPE_PROGRAM = 0
-} hsa_code_object_type_t;
-
-/**
- * @deprecated
- *
- * @brief Code object attributes.
- */
-typedef enum {
- /**
- * The version of the code object. The type of this attribute is a
- * NUL-terminated char[64]. The name must be at most 63 characters long (not
- * including the NUL terminator) and all array elements not used for the name
- * must be NUL.
- */
- HSA_CODE_OBJECT_INFO_VERSION = 0,
- /**
- * Type of code object. The type of this attribute is
- * ::hsa_code_object_type_t.
- */
- HSA_CODE_OBJECT_INFO_TYPE = 1,
- /**
- * Instruction set architecture this code object is produced for. The type of
- * this attribute is ::hsa_isa_t.
- */
- HSA_CODE_OBJECT_INFO_ISA = 2,
- /**
- * Machine model this code object is produced for. The type of this attribute
- * is ::hsa_machine_model_t.
- */
- HSA_CODE_OBJECT_INFO_MACHINE_MODEL = 3,
- /**
- * Profile this code object is produced for. The type of this attribute is
- * ::hsa_profile_t.
- */
- HSA_CODE_OBJECT_INFO_PROFILE = 4,
- /**
- * Default floating-point rounding mode used when the code object is
- * produced. The type of this attribute is
- * ::hsa_default_float_rounding_mode_t.
- */
- HSA_CODE_OBJECT_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 5
-} hsa_code_object_info_t;
-
-/**
- * @deprecated
- *
- * @brief Get the current value of an attribute for a given code object.
- *
- * @param[in] code_object Code object.
- *
- * @param[in] attribute Attribute to query.
- *
- * @param[out] value Pointer to an application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
- * code object attribute, or @p value is NULL.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_code_object_get_info(
- hsa_code_object_t code_object,
- hsa_code_object_info_t attribute,
- void *value);
-
-/**
- * @deprecated
- *
- * @brief Load code object into the executable.
- *
- * @details Every global or readonly variable that is external must be defined
- * before loading the code object. An internal global or readonly variable is
- * allocated once the code object, that is being loaded, references this
- * variable and this variable is not allocated.
- *
- * Any module linkage declaration must have been defined either by a define
- * variable or by loading a code object that has a symbol with module linkage
- * definition.
- *
- * @param[in] executable Executable.
- *
- * @param[in] agent Agent to load code object for. The agent must support the
- * default floating-point rounding mode used by @p code_object.
- *
- * @param[in] code_object Code object to load. The lifetime of the code object
- * must exceed that of the executable: if @p code_object is destroyed before @p
- * executable, the behavior is undefined.
- *
- * @param[in] options Standard and vendor-specific options. Unknown options are
- * ignored. A standard option begins with the "-hsa_" prefix. Options beginning
- * with the "-hsa_ext_<extension_name>_" prefix are reserved for extensions. A
- * vendor-specific option begins with the "-<vendor_name>_" prefix. Must be a
- * NUL-terminated string. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
- * allocate the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS @p agent is not compatible
- * with @p code_object (for example, @p agent does not support the default
- * floating-point rounding mode specified by @p code_object), or @p code_object
- * is not compatible with @p executable (for example, @p code_object and @p
- * executable have different machine models or profiles).
- *
- * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is frozen.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_executable_load_code_object(
- hsa_executable_t executable,
- hsa_agent_t agent,
- hsa_code_object_t code_object,
- const char *options);
-
-/**
- * @deprecated
- *
- * @brief Code object symbol handle.
- *
- * The lifetime of a code object symbol matches that of the code object
- * associated with it. An operation on a symbol whose associated code object has
- * been destroyed results in undefined behavior.
- */
-typedef struct hsa_code_symbol_s {
- /**
- * Opaque handle. Two handles reference the same object of the enclosing type
- * if and only if they are equal.
- */
- uint64_t handle;
-} hsa_code_symbol_t;
-
-/**
- * @deprecated
- *
- * @brief Get the symbol handle within a code object for a given a symbol name.
- *
- * @param[in] code_object Code object.
- *
- * @param[in] symbol_name Symbol name.
- *
- * @param[out] symbol Memory location where the HSA runtime stores the symbol
- * handle.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no symbol with a name
- * that matches @p symbol_name.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p symbol_name is NULL, or
- * @p symbol is NULL.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_code_object_get_symbol(
- hsa_code_object_t code_object,
- const char *symbol_name,
- hsa_code_symbol_t *symbol);
-
-/**
- * @deprecated
- *
- * @brief Get the symbol handle within a code object for a given a symbol name.
- *
- * @param[in] code_object Code object.
- *
- * @param[in] module_name Module name. Must be NULL if the symbol has
- * program linkage.
- *
- * @param[in] symbol_name Symbol name.
- *
- * @param[out] symbol Memory location where the HSA runtime stores the symbol
- * handle.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no symbol with a name
- * that matches @p symbol_name.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p symbol_name is NULL, or
- * @p symbol is NULL.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_code_object_get_symbol_from_name(
- hsa_code_object_t code_object,
- const char *module_name,
- const char *symbol_name,
- hsa_code_symbol_t *symbol);
-
-/**
- * @deprecated
- *
- * @brief Code object symbol attributes.
- */
-typedef enum {
- /**
- * The type of the symbol. The type of this attribute is ::hsa_symbol_kind_t.
- */
- HSA_CODE_SYMBOL_INFO_TYPE = 0,
- /**
- * The length of the symbol name in bytes, not including the NUL terminator.
- * The type of this attribute is uint32_t.
- */
- HSA_CODE_SYMBOL_INFO_NAME_LENGTH = 1,
- /**
- * The name of the symbol. The type of this attribute is character array with
- * the length equal to the value of ::HSA_CODE_SYMBOL_INFO_NAME_LENGTH
- * attribute.
- */
- HSA_CODE_SYMBOL_INFO_NAME = 2,
- /**
- * The length of the module name in bytes (not including the NUL terminator)
- * to which this symbol belongs if this symbol has module linkage, otherwise 0
- * is returned. The type of this attribute is uint32_t.
- */
- HSA_CODE_SYMBOL_INFO_MODULE_NAME_LENGTH = 3,
- /**
- * The module name to which this symbol belongs if this symbol has module
- * linkage, otherwise an empty string is returned. The type of this attribute
- * is character array with the length equal to the value of
- * ::HSA_CODE_SYMBOL_INFO_MODULE_NAME_LENGTH attribute.
- */
- HSA_CODE_SYMBOL_INFO_MODULE_NAME = 4,
- /**
- * The linkage kind of the symbol. The type of this attribute is
- * ::hsa_symbol_linkage_t.
- */
- HSA_CODE_SYMBOL_INFO_LINKAGE = 5,
- /**
- * Indicates whether the symbol corresponds to a definition. The type of this
- * attribute is bool.
- */
- HSA_CODE_SYMBOL_INFO_IS_DEFINITION = 17,
- /**
- * The allocation kind of the variable. The value of this attribute is
- * undefined if the symbol is not a variable. The type of this attribute is
- * ::hsa_variable_allocation_t.
- */
- HSA_CODE_SYMBOL_INFO_VARIABLE_ALLOCATION = 6,
- /**
- * The segment kind of the variable. The value of this attribute is
- * undefined if the symbol is not a variable. The type of this attribute is
- * ::hsa_variable_segment_t.
- */
- HSA_CODE_SYMBOL_INFO_VARIABLE_SEGMENT = 7,
- /**
- * Alignment of the symbol in memory. The value of this attribute is undefined
- * if the symbol is not a variable. The type of this attribute is uint32_t.
- *
- * The current alignment of the variable in memory may be greater than the
- * value specified in the source program variable declaration.
- */
- HSA_CODE_SYMBOL_INFO_VARIABLE_ALIGNMENT = 8,
- /**
- * Size of the variable. The value of this attribute is undefined if the
- * symbol is not a variable. The type of this attribute is uint32_t.
- *
- * A size of 0 is returned if the variable is an external variable and has an
- * unknown dimension.
- */
- HSA_CODE_SYMBOL_INFO_VARIABLE_SIZE = 9,
- /**
- * Indicates whether the variable is constant. The value of this attribute is
- * undefined if the symbol is not a variable. The type of this attribute is
- * bool.
- */
- HSA_CODE_SYMBOL_INFO_VARIABLE_IS_CONST = 10,
- /**
- * Size of kernarg segment memory that is required to hold the values of the
- * kernel arguments, in bytes. Must be a multiple of 16. The value of this
- * attribute is undefined if the symbol is not a kernel. The type of this
- * attribute is uint32_t.
- */
- HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE = 11,
- /**
- * Alignment (in bytes) of the buffer used to pass arguments to the kernel,
- * which is the maximum of 16 and the maximum alignment of any of the kernel
- * arguments. The value of this attribute is undefined if the symbol is not a
- * kernel. The type of this attribute is uint32_t.
- */
- HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT = 12,
- /**
- * Size of static group segment memory required by the kernel (per
- * work-group), in bytes. The value of this attribute is undefined
- * if the symbol is not a kernel. The type of this attribute is uint32_t.
- *
- * The reported amount does not include any dynamically allocated group
- * segment memory that may be requested by the application when a kernel is
- * dispatched.
- */
- HSA_CODE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE = 13,
- /**
- * Size of static private, spill, and arg segment memory required by
- * this kernel (per work-item), in bytes. The value of this attribute is
- * undefined if the symbol is not a kernel. The type of this attribute is
- * uint32_t.
- *
- * If the value of ::HSA_CODE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK is true,
- * the kernel may use more private memory than the reported value, and the
- * application must add the dynamic call stack usage to @a
- * private_segment_size when populating a kernel dispatch packet.
- */
- HSA_CODE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE = 14,
- /**
- * Dynamic callstack flag. The value of this attribute is undefined if the
- * symbol is not a kernel. The type of this attribute is bool.
- *
- * If this flag is set (the value is true), the kernel uses a dynamically
- * sized call stack. This can happen if recursive calls, calls to indirect
- * functions, or the HSAIL alloca instruction are present in the kernel.
- */
- HSA_CODE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK = 15,
- /**
- * Call convention of the kernel. The value of this attribute is undefined if
- * the symbol is not a kernel. The type of this attribute is uint32_t.
- */
- HSA_CODE_SYMBOL_INFO_KERNEL_CALL_CONVENTION = 18,
- /**
- * Call convention of the indirect function. The value of this attribute is
- * undefined if the symbol is not an indirect function. The type of this
- * attribute is uint32_t.
- */
- HSA_CODE_SYMBOL_INFO_INDIRECT_FUNCTION_CALL_CONVENTION = 16
-} hsa_code_symbol_info_t;
-
-/**
- * @deprecated
- *
- * @brief Get the current value of an attribute for a given code symbol.
- *
- * @param[in] code_symbol Code symbol.
- *
- * @param[in] attribute Attribute to query.
- *
- * @param[out] value Pointer to an application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_CODE_SYMBOL The code symbol is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
- * code symbol attribute, or @p value is NULL.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_code_symbol_get_info(
- hsa_code_symbol_t code_symbol,
- hsa_code_symbol_info_t attribute,
- void *value);
-
-/**
- * @deprecated
- *
- * @brief Iterate over the symbols in a code object, and invoke an
- * application-defined callback on every iteration.
- *
- * @param[in] code_object Code object.
- *
- * @param[in] callback Callback to be invoked once per code object symbol. The
- * HSA runtime passes three arguments to the callback: the code object, a
- * symbol, and the application data. If @p callback returns a status other than
- * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
- * ::hsa_code_object_iterate_symbols returns that status value.
- *
- * @param[in] data Application data that is passed to @p callback on every
- * iteration. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_code_object_iterate_symbols(
- hsa_code_object_t code_object,
- hsa_status_t (*callback)(hsa_code_object_t code_object,
- hsa_code_symbol_t symbol,
- void *data),
- void *data);
-
-/** @} */
-
-#ifdef __cplusplus
-} // end extern "C" block
-#endif
-
-#endif // header guard
diff --git a/third_party/rocm/include/hsa/hsa_api_trace.h b/third_party/rocm/include/hsa/hsa_api_trace.h
deleted file mode 100644
index 5c33f07..0000000
--- a/third_party/rocm/include/hsa/hsa_api_trace.h
+++ /dev/null
@@ -1,474 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-//
-// The University of Illinois/NCSA
-// Open Source License (NCSA)
-//
-// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
-//
-// Developed by:
-//
-// AMD Research and AMD HSA Software Development
-//
-// Advanced Micro Devices, Inc.
-//
-// www.amd.com
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to
-// deal with the Software without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-// and/or sell copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following conditions:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimers.
-// - Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimers in
-// the documentation and/or other materials provided with the distribution.
-// - Neither the names of Advanced Micro Devices, Inc,
-// nor the names of its contributors may be used to endorse or promote
-// products derived from this Software without specific prior written
-// permission.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-// DEALINGS WITH THE SOFTWARE.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef HSA_RUNTIME_INC_HSA_API_TRACE_H
-#define HSA_RUNTIME_INC_HSA_API_TRACE_H
-
-#include "hsa.h"
-#ifdef AMD_INTERNAL_BUILD
-#include "hsa_ext_image.h"
-#include "hsa_ext_amd.h"
-#include "hsa_ext_finalize.h"
-#else
-#include "inc/hsa_ext_image.h"
-#include "inc/hsa_ext_amd.h"
-#include "inc/hsa_ext_finalize.h"
-#endif
-
-#include <string.h>
-#include <assert.h>
-#include <stddef.h>
-
-// Major Ids of the Api tables exported by Hsa Core Runtime
-#define HSA_API_TABLE_MAJOR_VERSION 0x01
-#define HSA_CORE_API_TABLE_MAJOR_VERSION 0x01
-#define HSA_AMD_EXT_API_TABLE_MAJOR_VERSION 0x01
-#define HSA_FINALIZER_API_TABLE_MAJOR_VERSION 0x01
-#define HSA_IMAGE_API_TABLE_MAJOR_VERSION 0x01
-#define HSA_AQLPROFILE_API_TABLE_MAJOR_VERSION 0x01
-
-// Step Ids of the Api tables exported by Hsa Core Runtime
-#define HSA_API_TABLE_STEP_VERSION 0x00
-#define HSA_CORE_API_TABLE_STEP_VERSION 0x00
-#define HSA_AMD_EXT_API_TABLE_STEP_VERSION 0x00
-#define HSA_FINALIZER_API_TABLE_STEP_VERSION 0x00
-#define HSA_IMAGE_API_TABLE_STEP_VERSION 0x00
-#define HSA_AQLPROFILE_API_TABLE_STEP_VERSION 0x00
-
-// Min function used to copy Api Tables
-static inline uint32_t Min(const uint32_t a, const uint32_t b) {
- return (a > b) ? b : a;
-}
-
-// Declarations of APIs intended for use only by tools.
-typedef void (*hsa_amd_queue_intercept_packet_writer)(const void* pkts, uint64_t pkt_count);
-typedef void (*hsa_amd_queue_intercept_handler)(const void* pkts, uint64_t pkt_count,
- uint64_t user_pkt_index, void* data,
- hsa_amd_queue_intercept_packet_writer writer);
-hsa_status_t hsa_amd_queue_intercept_register(hsa_queue_t* queue,
- hsa_amd_queue_intercept_handler callback,
- void* user_data);
-hsa_status_t hsa_amd_queue_intercept_create(
- hsa_agent_t agent_handle, uint32_t size, hsa_queue_type32_t type,
- void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), void* data,
- uint32_t private_segment_size, uint32_t group_segment_size, hsa_queue_t** queue);
-
-typedef void (*hsa_amd_runtime_queue_notifier)(const hsa_queue_t* queue, hsa_agent_t agent,
- void* data);
-hsa_status_t hsa_amd_runtime_queue_create_register(hsa_amd_runtime_queue_notifier callback,
- void* user_data);
-
-// Structure of Version used to identify an instance of Api table
-// Must be the first member (offsetof == 0) of all API tables.
-// This is the root of the table passing ABI.
-struct ApiTableVersion {
- uint32_t major_id;
- uint32_t minor_id;
- uint32_t step_id;
- uint32_t reserved;
-};
-
-// Table to export HSA Finalizer Extension Apis
-struct FinalizerExtTable {
- ApiTableVersion version;
- decltype(hsa_ext_program_create)* hsa_ext_program_create_fn;
- decltype(hsa_ext_program_destroy)* hsa_ext_program_destroy_fn;
- decltype(hsa_ext_program_add_module)* hsa_ext_program_add_module_fn;
- decltype(hsa_ext_program_iterate_modules)* hsa_ext_program_iterate_modules_fn;
- decltype(hsa_ext_program_get_info)* hsa_ext_program_get_info_fn;
- decltype(hsa_ext_program_finalize)* hsa_ext_program_finalize_fn;
-};
-
-// Table to export HSA Image Extension Apis
-struct ImageExtTable {
- ApiTableVersion version;
- decltype(hsa_ext_image_get_capability)* hsa_ext_image_get_capability_fn;
- decltype(hsa_ext_image_data_get_info)* hsa_ext_image_data_get_info_fn;
- decltype(hsa_ext_image_create)* hsa_ext_image_create_fn;
- decltype(hsa_ext_image_import)* hsa_ext_image_import_fn;
- decltype(hsa_ext_image_export)* hsa_ext_image_export_fn;
- decltype(hsa_ext_image_copy)* hsa_ext_image_copy_fn;
- decltype(hsa_ext_image_clear)* hsa_ext_image_clear_fn;
- decltype(hsa_ext_image_destroy)* hsa_ext_image_destroy_fn;
- decltype(hsa_ext_sampler_create)* hsa_ext_sampler_create_fn;
- decltype(hsa_ext_sampler_destroy)* hsa_ext_sampler_destroy_fn;
- decltype(hsa_ext_image_get_capability_with_layout)* hsa_ext_image_get_capability_with_layout_fn;
- decltype(hsa_ext_image_data_get_info_with_layout)* hsa_ext_image_data_get_info_with_layout_fn;
- decltype(hsa_ext_image_create_with_layout)* hsa_ext_image_create_with_layout_fn;
-};
-
-// Table to export AMD Extension Apis
-struct AmdExtTable {
- ApiTableVersion version;
- decltype(hsa_amd_coherency_get_type)* hsa_amd_coherency_get_type_fn;
- decltype(hsa_amd_coherency_set_type)* hsa_amd_coherency_set_type_fn;
- decltype(hsa_amd_profiling_set_profiler_enabled)* hsa_amd_profiling_set_profiler_enabled_fn;
- decltype(hsa_amd_profiling_async_copy_enable) *hsa_amd_profiling_async_copy_enable_fn;
- decltype(hsa_amd_profiling_get_dispatch_time)* hsa_amd_profiling_get_dispatch_time_fn;
- decltype(hsa_amd_profiling_get_async_copy_time) *hsa_amd_profiling_get_async_copy_time_fn;
- decltype(hsa_amd_profiling_convert_tick_to_system_domain)* hsa_amd_profiling_convert_tick_to_system_domain_fn;
- decltype(hsa_amd_signal_async_handler)* hsa_amd_signal_async_handler_fn;
- decltype(hsa_amd_async_function)* hsa_amd_async_function_fn;
- decltype(hsa_amd_signal_wait_any)* hsa_amd_signal_wait_any_fn;
- decltype(hsa_amd_queue_cu_set_mask)* hsa_amd_queue_cu_set_mask_fn;
- decltype(hsa_amd_memory_pool_get_info)* hsa_amd_memory_pool_get_info_fn;
- decltype(hsa_amd_agent_iterate_memory_pools)* hsa_amd_agent_iterate_memory_pools_fn;
- decltype(hsa_amd_memory_pool_allocate)* hsa_amd_memory_pool_allocate_fn;
- decltype(hsa_amd_memory_pool_free)* hsa_amd_memory_pool_free_fn;
- decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy_fn;
- decltype(hsa_amd_agent_memory_pool_get_info)* hsa_amd_agent_memory_pool_get_info_fn;
- decltype(hsa_amd_agents_allow_access)* hsa_amd_agents_allow_access_fn;
- decltype(hsa_amd_memory_pool_can_migrate)* hsa_amd_memory_pool_can_migrate_fn;
- decltype(hsa_amd_memory_migrate)* hsa_amd_memory_migrate_fn;
- decltype(hsa_amd_memory_lock)* hsa_amd_memory_lock_fn;
- decltype(hsa_amd_memory_unlock)* hsa_amd_memory_unlock_fn;
- decltype(hsa_amd_memory_fill)* hsa_amd_memory_fill_fn;
- decltype(hsa_amd_interop_map_buffer)* hsa_amd_interop_map_buffer_fn;
- decltype(hsa_amd_interop_unmap_buffer)* hsa_amd_interop_unmap_buffer_fn;
- decltype(hsa_amd_image_create)* hsa_amd_image_create_fn;
- decltype(hsa_amd_pointer_info)* hsa_amd_pointer_info_fn;
- decltype(hsa_amd_pointer_info_set_userdata)* hsa_amd_pointer_info_set_userdata_fn;
- decltype(hsa_amd_ipc_memory_create)* hsa_amd_ipc_memory_create_fn;
- decltype(hsa_amd_ipc_memory_attach)* hsa_amd_ipc_memory_attach_fn;
- decltype(hsa_amd_ipc_memory_detach)* hsa_amd_ipc_memory_detach_fn;
- decltype(hsa_amd_signal_create)* hsa_amd_signal_create_fn;
- decltype(hsa_amd_ipc_signal_create)* hsa_amd_ipc_signal_create_fn;
- decltype(hsa_amd_ipc_signal_attach)* hsa_amd_ipc_signal_attach_fn;
- decltype(hsa_amd_register_system_event_handler)* hsa_amd_register_system_event_handler_fn;
- decltype(hsa_amd_queue_intercept_create)* hsa_amd_queue_intercept_create_fn;
- decltype(hsa_amd_queue_intercept_register)* hsa_amd_queue_intercept_register_fn;
- decltype(hsa_amd_queue_set_priority)* hsa_amd_queue_set_priority_fn;
- decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect_fn;
- decltype(hsa_amd_runtime_queue_create_register)* hsa_amd_runtime_queue_create_register_fn;
- decltype(hsa_amd_memory_lock_to_pool)* hsa_amd_memory_lock_to_pool_fn;
- decltype(hsa_amd_register_deallocation_callback)* hsa_amd_register_deallocation_callback_fn;
- decltype(hsa_amd_deregister_deallocation_callback)* hsa_amd_deregister_deallocation_callback_fn;
-};
-
-// Table to export HSA Core Runtime Apis
-struct CoreApiTable {
- ApiTableVersion version;
- decltype(hsa_init)* hsa_init_fn;
- decltype(hsa_shut_down)* hsa_shut_down_fn;
- decltype(hsa_system_get_info)* hsa_system_get_info_fn;
- decltype(hsa_system_extension_supported)* hsa_system_extension_supported_fn;
- decltype(hsa_system_get_extension_table)* hsa_system_get_extension_table_fn;
- decltype(hsa_iterate_agents)* hsa_iterate_agents_fn;
- decltype(hsa_agent_get_info)* hsa_agent_get_info_fn;
- decltype(hsa_queue_create)* hsa_queue_create_fn;
- decltype(hsa_soft_queue_create)* hsa_soft_queue_create_fn;
- decltype(hsa_queue_destroy)* hsa_queue_destroy_fn;
- decltype(hsa_queue_inactivate)* hsa_queue_inactivate_fn;
- decltype(hsa_queue_load_read_index_scacquire)* hsa_queue_load_read_index_scacquire_fn;
- decltype(hsa_queue_load_read_index_relaxed)* hsa_queue_load_read_index_relaxed_fn;
- decltype(hsa_queue_load_write_index_scacquire)* hsa_queue_load_write_index_scacquire_fn;
- decltype(hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed_fn;
- decltype(hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed_fn;
- decltype(hsa_queue_store_write_index_screlease)* hsa_queue_store_write_index_screlease_fn;
- decltype(hsa_queue_cas_write_index_scacq_screl)* hsa_queue_cas_write_index_scacq_screl_fn;
- decltype(hsa_queue_cas_write_index_scacquire)* hsa_queue_cas_write_index_scacquire_fn;
- decltype(hsa_queue_cas_write_index_relaxed)* hsa_queue_cas_write_index_relaxed_fn;
- decltype(hsa_queue_cas_write_index_screlease)* hsa_queue_cas_write_index_screlease_fn;
- decltype(hsa_queue_add_write_index_scacq_screl)* hsa_queue_add_write_index_scacq_screl_fn;
- decltype(hsa_queue_add_write_index_scacquire)* hsa_queue_add_write_index_scacquire_fn;
- decltype(hsa_queue_add_write_index_relaxed)* hsa_queue_add_write_index_relaxed_fn;
- decltype(hsa_queue_add_write_index_screlease)* hsa_queue_add_write_index_screlease_fn;
- decltype(hsa_queue_store_read_index_relaxed)* hsa_queue_store_read_index_relaxed_fn;
- decltype(hsa_queue_store_read_index_screlease)* hsa_queue_store_read_index_screlease_fn;
- decltype(hsa_agent_iterate_regions)* hsa_agent_iterate_regions_fn;
- decltype(hsa_region_get_info)* hsa_region_get_info_fn;
- decltype(hsa_agent_get_exception_policies)* hsa_agent_get_exception_policies_fn;
- decltype(hsa_agent_extension_supported)* hsa_agent_extension_supported_fn;
- decltype(hsa_memory_register)* hsa_memory_register_fn;
- decltype(hsa_memory_deregister)* hsa_memory_deregister_fn;
- decltype(hsa_memory_allocate)* hsa_memory_allocate_fn;
- decltype(hsa_memory_free)* hsa_memory_free_fn;
- decltype(hsa_memory_copy)* hsa_memory_copy_fn;
- decltype(hsa_memory_assign_agent)* hsa_memory_assign_agent_fn;
- decltype(hsa_signal_create)* hsa_signal_create_fn;
- decltype(hsa_signal_destroy)* hsa_signal_destroy_fn;
- decltype(hsa_signal_load_relaxed)* hsa_signal_load_relaxed_fn;
- decltype(hsa_signal_load_scacquire)* hsa_signal_load_scacquire_fn;
- decltype(hsa_signal_store_relaxed)* hsa_signal_store_relaxed_fn;
- decltype(hsa_signal_store_screlease)* hsa_signal_store_screlease_fn;
- decltype(hsa_signal_wait_relaxed)* hsa_signal_wait_relaxed_fn;
- decltype(hsa_signal_wait_scacquire)* hsa_signal_wait_scacquire_fn;
- decltype(hsa_signal_and_relaxed)* hsa_signal_and_relaxed_fn;
- decltype(hsa_signal_and_scacquire)* hsa_signal_and_scacquire_fn;
- decltype(hsa_signal_and_screlease)* hsa_signal_and_screlease_fn;
- decltype(hsa_signal_and_scacq_screl)* hsa_signal_and_scacq_screl_fn;
- decltype(hsa_signal_or_relaxed)* hsa_signal_or_relaxed_fn;
- decltype(hsa_signal_or_scacquire)* hsa_signal_or_scacquire_fn;
- decltype(hsa_signal_or_screlease)* hsa_signal_or_screlease_fn;
- decltype(hsa_signal_or_scacq_screl)* hsa_signal_or_scacq_screl_fn;
- decltype(hsa_signal_xor_relaxed)* hsa_signal_xor_relaxed_fn;
- decltype(hsa_signal_xor_scacquire)* hsa_signal_xor_scacquire_fn;
- decltype(hsa_signal_xor_screlease)* hsa_signal_xor_screlease_fn;
- decltype(hsa_signal_xor_scacq_screl)* hsa_signal_xor_scacq_screl_fn;
- decltype(hsa_signal_exchange_relaxed)* hsa_signal_exchange_relaxed_fn;
- decltype(hsa_signal_exchange_scacquire)* hsa_signal_exchange_scacquire_fn;
- decltype(hsa_signal_exchange_screlease)* hsa_signal_exchange_screlease_fn;
- decltype(hsa_signal_exchange_scacq_screl)* hsa_signal_exchange_scacq_screl_fn;
- decltype(hsa_signal_add_relaxed)* hsa_signal_add_relaxed_fn;
- decltype(hsa_signal_add_scacquire)* hsa_signal_add_scacquire_fn;
- decltype(hsa_signal_add_screlease)* hsa_signal_add_screlease_fn;
- decltype(hsa_signal_add_scacq_screl)* hsa_signal_add_scacq_screl_fn;
- decltype(hsa_signal_subtract_relaxed)* hsa_signal_subtract_relaxed_fn;
- decltype(hsa_signal_subtract_scacquire)* hsa_signal_subtract_scacquire_fn;
- decltype(hsa_signal_subtract_screlease)* hsa_signal_subtract_screlease_fn;
- decltype(hsa_signal_subtract_scacq_screl)* hsa_signal_subtract_scacq_screl_fn;
- decltype(hsa_signal_cas_relaxed)* hsa_signal_cas_relaxed_fn;
- decltype(hsa_signal_cas_scacquire)* hsa_signal_cas_scacquire_fn;
- decltype(hsa_signal_cas_screlease)* hsa_signal_cas_screlease_fn;
- decltype(hsa_signal_cas_scacq_screl)* hsa_signal_cas_scacq_screl_fn;
-
- //===--- Instruction Set Architecture -----------------------------------===//
-
- decltype(hsa_isa_from_name)* hsa_isa_from_name_fn;
- // Deprecated since v1.1.
- decltype(hsa_isa_get_info)* hsa_isa_get_info_fn;
- // Deprecated since v1.1.
- decltype(hsa_isa_compatible)* hsa_isa_compatible_fn;
-
- //===--- Code Objects (deprecated) --------------------------------------===//
-
- // Deprecated since v1.1.
- decltype(hsa_code_object_serialize)* hsa_code_object_serialize_fn;
- // Deprecated since v1.1.
- decltype(hsa_code_object_deserialize)* hsa_code_object_deserialize_fn;
- // Deprecated since v1.1.
- decltype(hsa_code_object_destroy)* hsa_code_object_destroy_fn;
- // Deprecated since v1.1.
- decltype(hsa_code_object_get_info)* hsa_code_object_get_info_fn;
- // Deprecated since v1.1.
- decltype(hsa_code_object_get_symbol)* hsa_code_object_get_symbol_fn;
- // Deprecated since v1.1.
- decltype(hsa_code_symbol_get_info)* hsa_code_symbol_get_info_fn;
- // Deprecated since v1.1.
- decltype(hsa_code_object_iterate_symbols)* hsa_code_object_iterate_symbols_fn;
-
- //===--- Executable -----------------------------------------------------===//
-
- // Deprecated since v1.1.
- decltype(hsa_executable_create)* hsa_executable_create_fn;
- decltype(hsa_executable_destroy)* hsa_executable_destroy_fn;
- // Deprecated since v1.1.
- decltype(hsa_executable_load_code_object)* hsa_executable_load_code_object_fn;
- decltype(hsa_executable_freeze)* hsa_executable_freeze_fn;
- decltype(hsa_executable_get_info)* hsa_executable_get_info_fn;
- decltype(hsa_executable_global_variable_define)*
- hsa_executable_global_variable_define_fn;
- decltype(hsa_executable_agent_global_variable_define)*
- hsa_executable_agent_global_variable_define_fn;
- decltype(hsa_executable_readonly_variable_define)*
- hsa_executable_readonly_variable_define_fn;
- decltype(hsa_executable_validate)* hsa_executable_validate_fn;
- // Deprecated since v1.1.
- decltype(hsa_executable_get_symbol)* hsa_executable_get_symbol_fn;
- decltype(hsa_executable_symbol_get_info)* hsa_executable_symbol_get_info_fn;
- // Deprecated since v1.1.
- decltype(hsa_executable_iterate_symbols)* hsa_executable_iterate_symbols_fn;
-
- //===--- Runtime Notifications ------------------------------------------===//
-
- decltype(hsa_status_string)* hsa_status_string_fn;
-
- // Start HSA v1.1 additions
- decltype(hsa_extension_get_name)* hsa_extension_get_name_fn;
- decltype(hsa_system_major_extension_supported)* hsa_system_major_extension_supported_fn;
- decltype(hsa_system_get_major_extension_table)* hsa_system_get_major_extension_table_fn;
- decltype(hsa_agent_major_extension_supported)* hsa_agent_major_extension_supported_fn;
- decltype(hsa_cache_get_info)* hsa_cache_get_info_fn;
- decltype(hsa_agent_iterate_caches)* hsa_agent_iterate_caches_fn;
- decltype(hsa_signal_silent_store_relaxed)* hsa_signal_silent_store_relaxed_fn;
- decltype(hsa_signal_silent_store_screlease)* hsa_signal_silent_store_screlease_fn;
- decltype(hsa_signal_group_create)* hsa_signal_group_create_fn;
- decltype(hsa_signal_group_destroy)* hsa_signal_group_destroy_fn;
- decltype(hsa_signal_group_wait_any_scacquire)* hsa_signal_group_wait_any_scacquire_fn;
- decltype(hsa_signal_group_wait_any_relaxed)* hsa_signal_group_wait_any_relaxed_fn;
-
- //===--- Instruction Set Architecture - HSA v1.1 additions --------------===//
-
- decltype(hsa_agent_iterate_isas)* hsa_agent_iterate_isas_fn;
- decltype(hsa_isa_get_info_alt)* hsa_isa_get_info_alt_fn;
- decltype(hsa_isa_get_exception_policies)* hsa_isa_get_exception_policies_fn;
- decltype(hsa_isa_get_round_method)* hsa_isa_get_round_method_fn;
- decltype(hsa_wavefront_get_info)* hsa_wavefront_get_info_fn;
- decltype(hsa_isa_iterate_wavefronts)* hsa_isa_iterate_wavefronts_fn;
-
- //===--- Code Objects (deprecated) - HSA v1.1 additions -----------------===//
-
- // Deprecated since v1.1.
- decltype(hsa_code_object_get_symbol_from_name)*
- hsa_code_object_get_symbol_from_name_fn;
-
- //===--- Executable - HSA v1.1 additions --------------------------------===//
-
- decltype(hsa_code_object_reader_create_from_file)*
- hsa_code_object_reader_create_from_file_fn;
- decltype(hsa_code_object_reader_create_from_memory)*
- hsa_code_object_reader_create_from_memory_fn;
- decltype(hsa_code_object_reader_destroy)* hsa_code_object_reader_destroy_fn;
- decltype(hsa_executable_create_alt)* hsa_executable_create_alt_fn;
- decltype(hsa_executable_load_program_code_object)*
- hsa_executable_load_program_code_object_fn;
- decltype(hsa_executable_load_agent_code_object)*
- hsa_executable_load_agent_code_object_fn;
- decltype(hsa_executable_validate_alt)* hsa_executable_validate_alt_fn;
- decltype(hsa_executable_get_symbol_by_name)*
- hsa_executable_get_symbol_by_name_fn;
- decltype(hsa_executable_iterate_agent_symbols)*
- hsa_executable_iterate_agent_symbols_fn;
- decltype(hsa_executable_iterate_program_symbols)*
- hsa_executable_iterate_program_symbols_fn;
-};
-
-// Table to export HSA Apis from Core Runtime, Amd Extensions
-// Finalizer and Images
-struct HsaApiTable {
-
- // Version of Hsa Api Table
- ApiTableVersion version;
-
- // Table of function pointers to HSA Core Runtime
- CoreApiTable* core_;
-
- // Table of function pointers to AMD extensions
- AmdExtTable* amd_ext_;
-
- // Table of function pointers to HSA Finalizer Extension
- FinalizerExtTable* finalizer_ext_;
-
- // Table of function pointers to HSA Image Extension
- ImageExtTable* image_ext_;
-};
-
-// Structure containing instances of different api tables
-struct HsaApiTableContainer {
- HsaApiTable root;
- CoreApiTable core;
- AmdExtTable amd_ext;
- FinalizerExtTable finalizer_ext;
- ImageExtTable image_ext;
-
- // Default initialization of a container instance
- HsaApiTableContainer() {
- root.version.major_id = HSA_API_TABLE_MAJOR_VERSION;
- root.version.minor_id = sizeof(HsaApiTable);
- root.version.step_id = HSA_API_TABLE_STEP_VERSION;
-
- core.version.major_id = HSA_CORE_API_TABLE_MAJOR_VERSION;
- core.version.minor_id = sizeof(CoreApiTable);
- core.version.step_id = HSA_CORE_API_TABLE_STEP_VERSION;
- root.core_ = &core;
-
- amd_ext.version.major_id = HSA_AMD_EXT_API_TABLE_MAJOR_VERSION;
- amd_ext.version.minor_id = sizeof(AmdExtTable);
- amd_ext.version.step_id = HSA_AMD_EXT_API_TABLE_STEP_VERSION;
- root.amd_ext_ = &amd_ext;
-
- finalizer_ext.version.major_id = HSA_FINALIZER_API_TABLE_MAJOR_VERSION;
- finalizer_ext.version.minor_id = sizeof(FinalizerExtTable);
- finalizer_ext.version.step_id = HSA_FINALIZER_API_TABLE_STEP_VERSION;
- root.finalizer_ext_ = & finalizer_ext;
-
- image_ext.version.major_id = HSA_IMAGE_API_TABLE_MAJOR_VERSION;
- image_ext.version.minor_id = sizeof(ImageExtTable);
- image_ext.version.step_id = HSA_IMAGE_API_TABLE_STEP_VERSION;
- root.image_ext_ = &image_ext;
- }
-};
-
-// Api to copy function pointers of a table
-static
-void inline copyApi(void* src, void* dest, size_t size) {
- assert(size >= sizeof(ApiTableVersion));
- memcpy((char*)src + sizeof(ApiTableVersion),
- (char*)dest + sizeof(ApiTableVersion),
- (size - sizeof(ApiTableVersion)));
-}
-
-// Copy Api child tables if valid.
-static void inline copyElement(ApiTableVersion* dest, ApiTableVersion* src) {
- if (src->major_id && (dest->major_id == src->major_id)) {
- dest->step_id = src->step_id;
- dest->minor_id = Min(dest->minor_id, src->minor_id);
- copyApi(dest, src, dest->minor_id);
- } else {
- dest->major_id = 0;
- dest->minor_id = 0;
- dest->step_id = 0;
- }
-}
-
-// Copy constructor for all Api tables. The function assumes the
-// user has initialized an instance of tables container correctly
-// for the Major, Minor and Stepping Ids of Root and Child Api tables.
-// The function will overwrite the value of Minor Id by taking the
-// minimum of source and destination parameters. It will also overwrite
-// the stepping Id with value from source parameter.
-static void inline copyTables(const HsaApiTable* src, HsaApiTable* dest) {
- // Verify Major Id of source and destination tables match
- if (dest->version.major_id != src->version.major_id) {
- dest->version.major_id = 0;
- dest->version.minor_id = 0;
- dest->version.step_id = 0;
- return;
- }
-
- // Initialize the stepping id and minor id of root table. For the
- // minor id which encodes struct size, take the minimum of source
- // and destination parameters
- dest->version.step_id = src->version.step_id;
- dest->version.minor_id = Min(dest->version.minor_id, src->version.minor_id);
-
- // Copy child tables if present
- if ((offsetof(HsaApiTable, core_) < dest->version.minor_id))
- copyElement(&dest->core_->version, &src->core_->version);
- if ((offsetof(HsaApiTable, amd_ext_) < dest->version.minor_id))
- copyElement(&dest->amd_ext_->version, &src->amd_ext_->version);
- if ((offsetof(HsaApiTable, finalizer_ext_) < dest->version.minor_id))
- copyElement(&dest->finalizer_ext_->version, &src->finalizer_ext_->version);
- if ((offsetof(HsaApiTable, image_ext_) < dest->version.minor_id))
- copyElement(&dest->image_ext_->version, &src->image_ext_->version);
-}
-#endif
diff --git a/third_party/rocm/include/hsa/hsa_ext_amd.h b/third_party/rocm/include/hsa/hsa_ext_amd.h
deleted file mode 100644
index 04a6e4d..0000000
--- a/third_party/rocm/include/hsa/hsa_ext_amd.h
+++ /dev/null
@@ -1,1983 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-//
-// The University of Illinois/NCSA
-// Open Source License (NCSA)
-//
-// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
-//
-// Developed by:
-//
-// AMD Research and AMD HSA Software Development
-//
-// Advanced Micro Devices, Inc.
-//
-// www.amd.com
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to
-// deal with the Software without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-// and/or sell copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following conditions:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimers.
-// - Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimers in
-// the documentation and/or other materials provided with the distribution.
-// - Neither the names of Advanced Micro Devices, Inc,
-// nor the names of its contributors may be used to endorse or promote
-// products derived from this Software without specific prior written
-// permission.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-// DEALINGS WITH THE SOFTWARE.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-// HSA AMD extension.
-
-#ifndef HSA_RUNTIME_EXT_AMD_H_
-#define HSA_RUNTIME_EXT_AMD_H_
-
-#include "hsa.h"
-#include "hsa_ext_image.h"
-
-#define HSA_AMD_INTERFACE_VERSION_MAJOR 1
-#define HSA_AMD_INTERFACE_VERSION_MINOR 0
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/**
- * @brief Enumeration constants added to ::hsa_status_t.
- *
- * @remark Additions to hsa_status_t
- */
-enum {
- /**
- * The memory pool is invalid.
- */
- HSA_STATUS_ERROR_INVALID_MEMORY_POOL = 40,
-
- /**
- * Agent accessed memory beyond the maximum legal address.
- */
- HSA_STATUS_ERROR_MEMORY_APERTURE_VIOLATION = 41,
-
- /**
- * Agent executed an invalid shader instruction.
- */
- HSA_STATUS_ERROR_ILLEGAL_INSTRUCTION = 42,
-};
-
-/**
- * @brief Agent attributes.
- */
-typedef enum hsa_amd_agent_info_s {
- /**
- * Chip identifier. The type of this attribute is uint32_t.
- */
- HSA_AMD_AGENT_INFO_CHIP_ID = 0xA000,
- /**
- * Size of a cacheline in bytes. The type of this attribute is uint32_t.
- */
- HSA_AMD_AGENT_INFO_CACHELINE_SIZE = 0xA001,
- /**
- * The number of compute unit available in the agent. The type of this
- * attribute is uint32_t.
- */
- HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT = 0xA002,
- /**
- * The maximum clock frequency of the agent in MHz. The type of this
- * attribute is uint32_t.
- */
- HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY = 0xA003,
- /**
- * Internal driver node identifier. The type of this attribute is uint32_t.
- */
- HSA_AMD_AGENT_INFO_DRIVER_NODE_ID = 0xA004,
- /**
- * Max number of watch points on memory address ranges to generate exception
- * events when the watched addresses are accessed. The type of this
- * attribute is uint32_t.
- */
- HSA_AMD_AGENT_INFO_MAX_ADDRESS_WATCH_POINTS = 0xA005,
- /**
- * Agent BDF_ID, named LocationID in thunk. The type of this attribute is
- * uint32_t.
- */
- HSA_AMD_AGENT_INFO_BDFID = 0xA006,
- /**
- * Memory Interface width, the return value type is uint32_t.
- * This attribute is deprecated.
- */
- HSA_AMD_AGENT_INFO_MEMORY_WIDTH = 0xA007,
- /**
- * Max Memory Clock, the return value type is uint32_t.
- */
- HSA_AMD_AGENT_INFO_MEMORY_MAX_FREQUENCY = 0xA008,
- /**
- * Board name of Agent - populated from MarketingName of Kfd Node
- * The value is an Ascii string of 64 chars.
- */
- HSA_AMD_AGENT_INFO_PRODUCT_NAME = 0xA009,
- /**
- * Maximum number of waves possible in a Compute Unit.
- * The type of this attribute is uint32_t.
- */
- HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU = 0xA00A,
- /**
- * Number of SIMD's per compute unit CU
- * The type of this attribute is uint32_t.
- */
- HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU = 0xA00B,
- /**
- * Number of Shader Engines (SE) in Gpu
- * The type of this attribute is uint32_t.
- */
- HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES = 0xA00C,
- /**
- * Number of Shader Arrays Per Shader Engines in Gpu
- * The type of this attribute is uint32_t.
- */
- HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE = 0xA00D,
- /**
- * Address of the HDP flush registers. Use of these registers does not conform to the HSA memory
- * model and should be treated with caution.
- * The type of this attribute is hsa_amd_hdp_flush_t.
- */
- HSA_AMD_AGENT_INFO_HDP_FLUSH = 0xA00E,
- /**
- * PCIe domain for the agent. Pairs with HSA_AMD_AGENT_INFO_BDFID
- * to give the full physical location of the Agent.
- * The type of this attribute is uint32_t.
- */
- HSA_AMD_AGENT_INFO_DOMAIN = 0xA00F,
- /**
- * Queries for support of cooperative queues. See ::HSA_QUEUE_TYPE_COOPERATIVE.
- * The type of this attribute is bool.
- */
- HSA_AMD_AGENT_INFO_COOPERATIVE_QUEUES = 0xA010,
- /**
- * Queries UUID of an agent. The value is an Ascii string with a maximum
- * of 21 chars including NUL. The string value consists of two parts: header
- * and body. The header identifies device type (GPU, CPU, DSP) while body
- * encodes UUID as a 16 digit hex string
- *
- * Agents that do not support UUID will return the string "GPU-XX" or
- * "CPU-XX" or "DSP-XX" depending upon their device type ::hsa_device_type_t
- */
- HSA_AMD_AGENT_INFO_UUID = 0xA011,
- /**
- * Queries for the ASIC revision of an agent. The value is an integer that
- * increments for each revision. This can be used by user-level software to
- * change how it operates, depending on the hardware version. This allows
- * selective workarounds for hardware errata.
- * The type of this attribute is uint32_t.
- */
- HSA_AMD_AGENT_INFO_ASIC_REVISION = 0xA012
-} hsa_amd_agent_info_t;
-
-typedef struct hsa_amd_hdp_flush_s {
- uint32_t* HDP_MEM_FLUSH_CNTL;
- uint32_t* HDP_REG_FLUSH_CNTL;
-} hsa_amd_hdp_flush_t;
-
-/**
- * @brief Region attributes.
- */
-typedef enum hsa_amd_region_info_s {
- /**
- * Determine if host can access the region. The type of this attribute
- * is bool.
- */
- HSA_AMD_REGION_INFO_HOST_ACCESSIBLE = 0xA000,
- /**
- * Base address of the region in flat address space.
- */
- HSA_AMD_REGION_INFO_BASE = 0xA001,
- /**
- * Memory Interface width, the return value type is uint32_t.
- * This attribute is deprecated. Use HSA_AMD_AGENT_INFO_MEMORY_WIDTH.
- */
- HSA_AMD_REGION_INFO_BUS_WIDTH = 0xA002,
- /**
- * Max Memory Clock, the return value type is uint32_t.
- * This attribute is deprecated. Use HSA_AMD_AGENT_INFO_MEMORY_MAX_FREQUENCY.
- */
- HSA_AMD_REGION_INFO_MAX_CLOCK_FREQUENCY = 0xA003
-} hsa_amd_region_info_t;
-
-/**
- * @brief Coherency attributes of fine grain region.
- */
-typedef enum hsa_amd_coherency_type_s {
- /**
- * Coherent region.
- */
- HSA_AMD_COHERENCY_TYPE_COHERENT = 0,
- /**
- * Non coherent region.
- */
- HSA_AMD_COHERENCY_TYPE_NONCOHERENT = 1
-} hsa_amd_coherency_type_t;
-
-/**
- * @brief Get the coherency type of the fine grain region of an agent.
- *
- * @param[in] agent A valid agent.
- *
- * @param[out] type Pointer to a memory location where the HSA runtime will
- * store the coherency type of the fine grain region.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p type is NULL.
- */
-hsa_status_t HSA_API hsa_amd_coherency_get_type(hsa_agent_t agent,
- hsa_amd_coherency_type_t* type);
-
-/**
- * @brief Set the coherency type of the fine grain region of an agent.
- * Deprecated. This is supported on KV platforms. For backward compatibility
- * other platforms will spuriously succeed.
- *
- * @param[in] agent A valid agent.
- *
- * @param[in] type The coherency type to be set.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p type is invalid.
- */
-hsa_status_t HSA_API hsa_amd_coherency_set_type(hsa_agent_t agent,
- hsa_amd_coherency_type_t type);
-
-/**
- * @brief Structure containing profiling dispatch time information.
- *
- * Times are reported as ticks in the domain of the HSA system clock.
- * The HSA system clock tick and frequency is obtained via hsa_system_get_info.
- */
-typedef struct hsa_amd_profiling_dispatch_time_s {
- /**
- * Dispatch packet processing start time.
- */
- uint64_t start;
- /**
- * Dispatch packet completion time.
- */
- uint64_t end;
-} hsa_amd_profiling_dispatch_time_t;
-
-/**
- * @brief Structure containing profiling async copy time information.
- *
- * Times are reported as ticks in the domain of the HSA system clock.
- * The HSA system clock tick and frequency is obtained via hsa_system_get_info.
- */
-typedef struct hsa_amd_profiling_async_copy_time_s {
- /**
- * Async copy processing start time.
- */
- uint64_t start;
- /**
- * Async copy completion time.
- */
- uint64_t end;
-} hsa_amd_profiling_async_copy_time_t;
-
-/**
- * @brief Enable or disable profiling capability of a queue.
- *
- * @param[in] queue A valid queue.
- *
- * @param[in] enable 1 to enable profiling. 0 to disable profiling.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE The queue is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p queue is NULL.
- */
-hsa_status_t HSA_API
- hsa_amd_profiling_set_profiler_enabled(hsa_queue_t* queue, int enable);
-
-/**
- * @brief Enable or disable asynchronous memory copy profiling.
- *
- * @details The runtime will provide the copy processing start timestamp and
- * completion timestamp of each call to hsa_amd_memory_async_copy if the
- * async copy profiling is enabled prior to the call to
- * hsa_amd_memory_async_copy. The completion signal object is used to
- * hold the last async copy start and end timestamp. The client can retrieve
- * these timestamps via call to hsa_amd_profiling_get_async_copy_time.
- *
- * @param[in] enable True to enable profiling. False to disable profiling.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES Failed on allocating resources
- * needed to profile the asynchronous copy.
- */
-hsa_status_t HSA_API
- hsa_amd_profiling_async_copy_enable(bool enable);
-
-/**
- * @brief Retrieve packet processing time stamps.
- *
- * @param[in] agent The agent with which the signal was last used. For
- * instance, if the profiled dispatch packet is dispatched onto queue Q,
- * which was created on agent A, then this parameter must be A.
- *
- * @param[in] signal A signal used as the completion signal of the dispatch
- * packet to retrieve time stamps from. This dispatch packet must have been
- * issued to a queue with profiling enabled and have already completed. Also
- * the signal must not have yet been used in any other packet following the
- * completion of the profiled dispatch packet.
- *
- * @param[out] time Packet processing timestamps in the HSA system clock
- * domain.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_SIGNAL The signal is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p time is NULL.
- */
-hsa_status_t HSA_API hsa_amd_profiling_get_dispatch_time(
- hsa_agent_t agent, hsa_signal_t signal,
- hsa_amd_profiling_dispatch_time_t* time);
-
-/**
- * @brief Retrieve asynchronous copy timestamps.
- *
- * @details Async copy profiling is enabled via call to
- * hsa_amd_profiling_async_copy_enable.
- *
- * @param[in] signal A signal used as the completion signal of the call to
- * hsa_amd_memory_async_copy.
- *
- * @param[out] time Async copy processing timestamps in the HSA system clock
- * domain.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_SIGNAL The signal is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p time is NULL.
- */
-hsa_status_t HSA_API hsa_amd_profiling_get_async_copy_time(
- hsa_signal_t signal, hsa_amd_profiling_async_copy_time_t* time);
-
-/**
- * @brief Computes the frequency ratio and offset between the agent clock and
- * HSA system clock and converts the agent's tick to HSA system domain tick.
- *
- * @param[in] agent The agent used to retrieve the agent_tick. It is user's
- * responsibility to make sure the tick number is from this agent, otherwise,
- * the behavior is undefined.
- *
- * @param[in] agent_tick The tick count retrieved from the specified @p agent.
- *
- * @param[out] system_tick The translated HSA system domain clock counter tick.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p system_tick is NULL;
- */
-hsa_status_t HSA_API
- hsa_amd_profiling_convert_tick_to_system_domain(hsa_agent_t agent,
- uint64_t agent_tick,
- uint64_t* system_tick);
-
-/**
- * @brief Signal attribute flags.
- */
-typedef enum {
- /**
- * Signal will only be consumed by AMD GPUs. Limits signal consumption to
- * AMD GPU agents only. Ignored if @p num_consumers is not zero (all agents).
- */
- HSA_AMD_SIGNAL_AMD_GPU_ONLY = 1,
- /**
- * Signal may be used for interprocess communication.
- * IPC signals can be read, written, and waited on from any process.
- * Profiling using an IPC enabled signal is only supported in a single process
- * at a time. Producing profiling data in one process and consuming it in
- * another process is undefined.
- */
- HSA_AMD_SIGNAL_IPC = 2,
-} hsa_amd_signal_attribute_t;
-
-/**
- * @brief Create a signal with specific attributes.
- *
- * @param[in] initial_value Initial value of the signal.
- *
- * @param[in] num_consumers Size of @p consumers. A value of 0 indicates that
- * any agent might wait on the signal.
- *
- * @param[in] consumers List of agents that might consume (wait on) the
- * signal. If @p num_consumers is 0, this argument is ignored; otherwise, the
- * HSA runtime might use the list to optimize the handling of the signal
- * object. If an agent not listed in @p consumers waits on the returned
- * signal, the behavior is undefined. The memory associated with @p consumers
- * can be reused or freed after the function returns.
- *
- * @param[in] attributes Requested signal attributes. Multiple signal attributes
- * may be requested by combining them with bitwise OR. Requesting no attributes
- * (@p attributes == 0) results in the same signal as would have been obtained
- * via hsa_signal_create.
- *
- * @param[out] signal Pointer to a memory location where the HSA runtime will
- * store the newly created signal handle. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
- * the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p signal is NULL, @p
- * num_consumers is greater than 0 but @p consumers is NULL, or @p consumers
- * contains duplicates.
- */
-hsa_status_t HSA_API hsa_amd_signal_create(hsa_signal_value_t initial_value, uint32_t num_consumers,
- const hsa_agent_t* consumers, uint64_t attributes,
- hsa_signal_t* signal);
-
-/**
- * @brief Asyncronous signal handler function type.
- *
- * @details Type definition of callback function to be used with
- * hsa_amd_signal_async_handler. This callback is invoked if the associated
- * signal and condition are met. The callback receives the value of the signal
- * which satisfied the associated wait condition and a user provided value. If
- * the callback returns true then the callback will be called again if the
- * associated signal and condition are satisfied again. If the callback returns
- * false then it will not be called again.
- *
- * @param[in] value Contains the value of the signal observed by
- * hsa_amd_signal_async_handler which caused the signal handler to be invoked.
- *
- * @param[in] arg Contains the user provided value given when the signal handler
- * was registered with hsa_amd_signal_async_handler
- *
- * @retval true resumes monitoring the signal with this handler (as if calling
- * hsa_amd_signal_async_handler again with identical parameters)
- *
- * @retval false stops monitoring the signal with this handler (handler will
- * not be called again for this signal)
- *
- */
-typedef bool (*hsa_amd_signal_handler)(hsa_signal_value_t value, void* arg);
-
-/**
- * @brief Register asynchronous signal handler function.
- *
- * @details Allows registering a callback function and user provided value with
- * a signal and wait condition. The callback will be invoked if the associated
- * signal and wait condition are satisfied. Callbacks will be invoked serially
- * but in an arbitrary order so callbacks should be independent of each other.
- * After being invoked a callback may continue to wait for its associated signal
- * and condition and, possibly, be invoked again. Or the callback may stop
- * waiting. If the callback returns true then it will continue waiting and may
- * be called again. If false then the callback will not wait again and will not
- * be called again for the associated signal and condition. It is possible to
- * register the same callback multiple times with the same or different signals
- * and/or conditions. Each registration of the callback will be treated entirely
- * independently.
- *
- * @param[in] signal hsa signal to be asynchronously monitored
- *
- * @param[in] cond condition value to monitor for
- *
- * @param[in] value signal value used in condition expression
- *
- * @param[in] handler asynchronous signal handler invoked when signal's
- * condition is met
- *
- * @param[in] arg user provided value which is provided to handler when handler
- * is invoked
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_SIGNAL signal is not a valid hsa_signal_t
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT handler is invalid (NULL)
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime is out of
- * resources or blocking signals are not supported by the HSA driver component.
- *
- */
-hsa_status_t HSA_API
- hsa_amd_signal_async_handler(hsa_signal_t signal,
- hsa_signal_condition_t cond,
- hsa_signal_value_t value,
- hsa_amd_signal_handler handler, void* arg);
-
-/**
- * @brief Call a function asynchronously
- *
- * @details Provides access to the runtime's asynchronous event handling thread
- * for general asynchronous functions. Functions queued this way are executed
- * in the same manner as if they were a signal handler who's signal is
- * satisfied.
- *
- * @param[in] callback asynchronous function to be invoked
- *
- * @param[in] arg user provided value which is provided to handler when handler
- * is invoked
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT handler is invalid (NULL)
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime is out of
- * resources or blocking signals are not supported by the HSA driver component.
- *
- */
-hsa_status_t HSA_API
- hsa_amd_async_function(void (*callback)(void* arg), void* arg);
-
-/**
- * @brief Wait for any signal-condition pair to be satisfied.
- *
- * @details Allows waiting for any of several signal and conditions pairs to be
- * satisfied. The function returns the index into the list of signals of the
- * first satisfying signal-condition pair. The value of the satisfying signal's
- * value is returned in satisfying_value unless satisfying_value is NULL. This
- * function provides only relaxed memory semantics.
- */
-uint32_t HSA_API
- hsa_amd_signal_wait_any(uint32_t signal_count, hsa_signal_t* signals,
- hsa_signal_condition_t* conds,
- hsa_signal_value_t* values, uint64_t timeout_hint,
- hsa_wait_state_t wait_hint,
- hsa_signal_value_t* satisfying_value);
-
-/**
- * @brief Query image limits.
- *
- * @param[in] agent A valid agent.
- *
- * @param[in] attribute HSA image info attribute to query.
- *
- * @param[out] value Pointer to an application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE @p value is NULL or @p attribute <
- * HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS or @p attribute >
- * HSA_EXT_AGENT_INFO_IMAGE_ARRAY_MAX_LAYERS.
- *
- */
-hsa_status_t HSA_API hsa_amd_image_get_info_max_dim(hsa_agent_t agent,
- hsa_agent_info_t attribute,
- void* value);
-
-/**
- * @brief Set a CU affinity to specific queues within the process, this function
- * call is "atomic".
- *
- * @param[in] queue A pointer to HSA queue.
- *
- * @param[in] num_cu_mask_count Size of CUMask bit array passed in.
- *
- * @param[in] cu_mask Bit-vector representing the CU mask.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE @p queue is NULL or invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p num_cu_mask_count is not
- * multiple of 32 or @p cu_mask is NULL.
- *
- * @retval ::HSA_STATUS_ERROR failed to call thunk api
- *
- */
-hsa_status_t HSA_API hsa_amd_queue_cu_set_mask(const hsa_queue_t* queue,
- uint32_t num_cu_mask_count,
- const uint32_t* cu_mask);
-
-/**
- * @brief Memory segments associated with a memory pool.
- */
-typedef enum {
- /**
- * Global segment. Used to hold data that is shared by all agents.
- */
- HSA_AMD_SEGMENT_GLOBAL = 0,
- /**
- * Read-only segment. Used to hold data that remains constant during the
- * execution of a kernel.
- */
- HSA_AMD_SEGMENT_READONLY = 1,
- /**
- * Private segment. Used to hold data that is local to a single work-item.
- */
- HSA_AMD_SEGMENT_PRIVATE = 2,
- /**
- * Group segment. Used to hold data that is shared by the work-items of a
- * work-group.
- */
- HSA_AMD_SEGMENT_GROUP = 3,
-} hsa_amd_segment_t;
-
-/**
- * @brief A memory pool encapsulates physical storage on an agent
- * along with a memory access model.
- *
- * @details A memory pool encapsulates a physical partition of an agent's
- * memory system along with a memory access model. Division of a single
- * memory system into separate pools allows querying each partition's access
- * path properties (see ::hsa_amd_agent_memory_pool_get_info). Allocations
- * from a pool are preferentially bound to that pool's physical partition.
- * Binding to the pool's preferential physical partition may not be
- * possible or persistent depending on the system's memory policy
- * and/or state which is beyond the scope of HSA APIs.
- *
- * For example, a multi-node NUMA memory system may be represented by multiple
- * pool's with each pool providing size and access path information for the
- * partition it represents. Allocations from a pool are preferentially bound
- * to the pool's partition (which in this example is a NUMA node) while
- * following its memory access model. The actual placement may vary or migrate
- * due to the system's NUMA policy and state, which is beyond the scope of
- * HSA APIs.
- */
-typedef struct hsa_amd_memory_pool_s {
- /**
- * Opaque handle.
- */
- uint64_t handle;
-} hsa_amd_memory_pool_t;
-
-typedef enum hsa_amd_memory_pool_global_flag_s {
- /**
- * The application can use allocations in the memory pool to store kernel
- * arguments, and provide the values for the kernarg segment of
- * a kernel dispatch.
- */
- HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT = 1,
- /**
- * Updates to memory in this pool conform to HSA memory consistency model.
- * If this flag is set, then ::HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED
- * must not be set.
- */
- HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED = 2,
- /**
- * Writes to memory in this pool can be performed by a single agent at a time.
- */
- HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED = 4
-} hsa_amd_memory_pool_global_flag_t;
-
-/**
- * @brief Memory pool features.
- */
-typedef enum {
- /**
- * Segment where the memory pool resides. The type of this attribute is
- * ::hsa_amd_segment_t.
- */
- HSA_AMD_MEMORY_POOL_INFO_SEGMENT = 0,
- /**
- * Flag mask. The value of this attribute is undefined if the value of
- * ::HSA_AMD_MEMORY_POOL_INFO_SEGMENT is not ::HSA_AMD_SEGMENT_GLOBAL. The type
- * of
- * this attribute is uint32_t, a bit-field of
- * ::hsa_amd_memory_pool_global_flag_t
- * values.
- */
- HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS = 1,
- /**
- * Size of this pool, in bytes. The type of this attribute is size_t.
- */
- HSA_AMD_MEMORY_POOL_INFO_SIZE = 2,
- /**
- * Indicates whether memory in this pool can be allocated using
- * ::hsa_amd_memory_pool_allocate. The type of this attribute is bool.
- *
- * The value of this flag is always false for memory pools in the group and
- * private segments.
- */
- HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED = 5,
- /**
- * Allocation granularity of buffers allocated by
- * ::hsa_amd_memory_pool_allocate
- * in this memory pool. The size of a buffer allocated in this pool is a
- * multiple of the value of this attribute. The value of this attribute is
- * only defined if ::HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED is true for
- * this pool. The type of this attribute is size_t.
- */
- HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE = 6,
- /**
- * Alignment of buffers allocated by ::hsa_amd_memory_pool_allocate in this
- * pool. The value of this attribute is only defined if
- * ::HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED is true for this pool, and
- * must be a power of 2. The type of this attribute is size_t.
- */
- HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT = 7,
- /**
- * This memory_pool can be made directly accessible by all the agents in the
- * system (::hsa_amd_agent_memory_pool_get_info does not return
- * ::HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED for any agent). The type of this
- * attribute is bool.
- */
- HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL = 15,
- /**
- * Maximum aggregate allocation size in bytes. The type of this attribute
- * is size_t.
- */
- HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE = 16,
-} hsa_amd_memory_pool_info_t;
-
-/**
- * @brief Get the current value of an attribute of a memory pool.
- *
- * @param[in] memory_pool A valid memory pool.
- *
- * @param[in] attribute Attribute to query.
- *
- * @param[out] value Pointer to a application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- */
-hsa_status_t HSA_API
- hsa_amd_memory_pool_get_info(hsa_amd_memory_pool_t memory_pool,
- hsa_amd_memory_pool_info_t attribute,
- void* value);
-
-/**
- * @brief Iterate over the memory pools associated with a given agent, and
- * invoke an application-defined callback on every iteration.
- *
- * @details An agent can directly access buffers located in some memory pool, or
- * be enabled to access them by the application (see ::hsa_amd_agents_allow_access),
- * yet that memory pool may not be returned by this function for that given
- * agent.
- *
- * A memory pool of fine-grained type must be associated only with the host.
- *
- * @param[in] agent A valid agent.
- *
- * @param[in] callback Callback to be invoked on the same thread that called
- * ::hsa_amd_agent_iterate_memory_pools, serially, once per memory pool that is
- * associated with the agent. The HSA runtime passes two arguments to the
- * callback: the memory pool, and the application data. If @p callback
- * returns a status other than ::HSA_STATUS_SUCCESS for a particular iteration,
- * the traversal stops and ::hsa_amd_agent_iterate_memory_pools returns that status
- * value.
- *
- * @param[in] data Application data that is passed to @p callback on every
- * iteration. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
- */
-hsa_status_t HSA_API hsa_amd_agent_iterate_memory_pools(
- hsa_agent_t agent,
- hsa_status_t (*callback)(hsa_amd_memory_pool_t memory_pool, void* data),
- void* data);
-
-/**
- * @brief Allocate a block of memory (or buffer) in the specified pool.
- *
- * @param[in] memory_pool Memory pool where to allocate memory from. The memory
- * pool must have the ::HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED flag set.
- *
- * @param[in] size Allocation size, in bytes. Must not be zero. This value is
- * rounded up to the nearest multiple of
- * ::HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE in @p memory_pool.
- *
- * @param[in] flags A bit-field that is used to specify allocation
- * directives. Reserved parameter, must be 0.
- *
- * @param[out] ptr Pointer to the location where to store the base virtual
- * address of
- * the allocated block. The returned base address is aligned to the value of
- * ::HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT in @p memory_pool. If the
- * allocation fails, the returned value is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES No memory is available.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_MEMORY_POOL The memory pool is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION The host is not allowed to
- * allocate memory in @p memory_pool, or @p size is greater than
- * the value of HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE in @p memory_pool.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is NULL, or @p size is 0,
- * or flags is not 0.
- *
- */
-hsa_status_t HSA_API
- hsa_amd_memory_pool_allocate(hsa_amd_memory_pool_t memory_pool, size_t size,
- uint32_t flags, void** ptr);
-
-/**
- * @brief Deallocate a block of memory previously allocated using
- * ::hsa_amd_memory_pool_allocate.
- *
- * @param[in] ptr Pointer to a memory block. If @p ptr does not match a value
- * previously returned by ::hsa_amd_memory_pool_allocate, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- */
-hsa_status_t HSA_API hsa_amd_memory_pool_free(void* ptr);
-
-/**
- * @brief Asynchronously copy a block of memory from the location pointed to by
- * @p src on the @p src_agent to the memory block pointed to by @p dst on the @p
- * dst_agent.
- * Because the DMA engines used may not be in the same coherency domain, the caller must ensure
- * that buffers are system-level coherent. In general this requires the sending device to have
- * released the buffer to system scope prior to executing the copy API and the receiving device
- * must execute a system scope acquire fence prior to use of the destination buffer.
- *
- * @param[out] dst Buffer where the content is to be copied.
- *
- * @param[in] dst_agent Agent associated with the @p dst. The agent must be able to directly
- * access both the source and destination buffers in their current locations.
- *
- * @param[in] src A valid pointer to the source of data to be copied. The source
- * buffer must not overlap with the destination buffer, otherwise the copy will succeed
- * but contents of @p dst is undefined.
- *
- * @param[in] src_agent Agent associated with the @p src. The agent must be able to directly
- * access both the source and destination buffers in their current locations.
- *
- * @param[in] size Number of bytes to copy. If @p size is 0, no copy is
- * performed and the function returns success. Copying a number of bytes larger
- * than the size of the buffers pointed by @p dst or @p src results in undefined
- * behavior.
- *
- * @param[in] num_dep_signals Number of dependent signals. Can be 0.
- *
- * @param[in] dep_signals List of signals that must be waited on before the copy
- * operation starts. The copy will start after every signal has been observed with
- * the value 0. The dependent signal should not include completion signal from hsa_amd_memory_async_copy
- * operation to be issued in future as that can result in a deadlock. If @p num_dep_signals is 0, this
- * argument is ignored.
- *
- * @param[in] completion_signal Signal used to indicate completion of the copy
- * operation. When the copy operation is finished, the value of the signal is
- * decremented. The runtime indicates that an error has occurred during the copy
- * operation by setting the value of the completion signal to a negative
- * number. The signal handle must not be 0.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. The
- * application is responsible for checking for asynchronous error conditions
- * (see the description of @p completion_signal).
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_SIGNAL @p completion_signal is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT The source or destination
- * pointers are NULL, or the completion signal is 0.
- */
-hsa_status_t HSA_API
- hsa_amd_memory_async_copy(void* dst, hsa_agent_t dst_agent, const void* src,
- hsa_agent_t src_agent, size_t size,
- uint32_t num_dep_signals,
- const hsa_signal_t* dep_signals,
- hsa_signal_t completion_signal);
-
-/*
-[Provisional API]
-Pitched memory descriptor.
-All elements must be 4 byte aligned. Pitch and slice are in bytes.
-*/
-typedef struct hsa_pitched_ptr_s {
- void* base;
- size_t pitch;
- size_t slice;
-} hsa_pitched_ptr_t;
-
-/*
-[Provisional API]
-Copy direction flag.
-*/
-typedef enum {
- hsaHostToHost = 0,
- hsaHostToDevice = 1,
- hsaDeviceToHost = 2,
- hsaDeviceToDevice = 3
-} hsa_amd_copy_direction_t;
-
-/*
-[Provisional API]
-SDMA 3D memory copy API. The same requirements must be met by src and dst as in
-hsa_amd_memory_async_copy.
-Both src and dst must be directly accessible to the copy_agent during the copy, src and dst rects
-must not overlap.
-CPU agents are not supported. API requires SDMA and will return an error if SDMA is not available.
-Offsets and range carry x in bytes, y and z in rows and layers.
-*/
-hsa_status_t HSA_API hsa_amd_memory_async_copy_rect(
- const hsa_pitched_ptr_t* dst, const hsa_dim3_t* dst_offset, const hsa_pitched_ptr_t* src,
- const hsa_dim3_t* src_offset, const hsa_dim3_t* range, hsa_agent_t copy_agent,
- hsa_amd_copy_direction_t dir, uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
- hsa_signal_t completion_signal);
-
-/**
- * @brief Type of accesses to a memory pool from a given agent.
- */
-typedef enum {
- /**
- * The agent cannot directly access any buffer in the memory pool.
- */
- HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED = 0,
- /**
- * The agent can directly access a buffer located in the pool; the application
- * does not need to invoke ::hsa_amd_agents_allow_access.
- */
- HSA_AMD_MEMORY_POOL_ACCESS_ALLOWED_BY_DEFAULT = 1,
- /**
- * The agent can directly access a buffer located in the pool, but only if the
- * application has previously requested access to that buffer using
- * ::hsa_amd_agents_allow_access.
- */
- HSA_AMD_MEMORY_POOL_ACCESS_DISALLOWED_BY_DEFAULT = 2
-} hsa_amd_memory_pool_access_t;
-
-/**
- * @brief Properties of the relationship between an agent a memory pool.
- */
-typedef enum {
- /**
- * Hyper-transport bus type.
- */
- HSA_AMD_LINK_INFO_TYPE_HYPERTRANSPORT = 0,
-
- /**
- * QPI bus type.
- */
- HSA_AMD_LINK_INFO_TYPE_QPI = 1,
-
- /**
- * PCIe bus type.
- */
- HSA_AMD_LINK_INFO_TYPE_PCIE = 2,
-
- /**
- * Infiniband bus type.
- */
- HSA_AMD_LINK_INFO_TYPE_INFINBAND = 3,
-
- /**
- * xGMI link type.
- */
- HSA_AMD_LINK_INFO_TYPE_XGMI = 4
-
-} hsa_amd_link_info_type_t;
-
-/**
- * @brief Link properties when accessing the memory pool from the specified
- * agent.
- */
-typedef struct hsa_amd_memory_pool_link_info_s {
- /**
- * Minimum transfer latency (rounded to ns).
- */
- uint32_t min_latency;
-
- /**
- * Maximum transfer latency (rounded to ns).
- */
- uint32_t max_latency;
-
- /**
- * Minimum link interface bandwidth in MB/s.
- */
- uint32_t min_bandwidth;
-
- /**
- * Maximum link interface bandwidth in MB/s.
- */
- uint32_t max_bandwidth;
-
- /**
- * Support for 32-bit atomic transactions.
- */
- bool atomic_support_32bit;
-
- /**
- * Support for 64-bit atomic transactions.
- */
- bool atomic_support_64bit;
-
- /**
- * Support for cache coherent transactions.
- */
- bool coherent_support;
-
- /**
- * The type of bus/link.
- */
- hsa_amd_link_info_type_t link_type;
-
- /**
- * NUMA distance of memory pool relative to querying agent
- */
- uint32_t numa_distance;
-} hsa_amd_memory_pool_link_info_t;
-
-/**
- * @brief Properties of the relationship between an agent a memory pool.
- */
-typedef enum {
- /**
- * Access to buffers located in the memory pool. The type of this attribute
- * is ::hsa_amd_memory_pool_access_t.
- *
- * An agent can always directly access buffers currently located in a memory
- * pool that is associated (the memory_pool is one of the values returned by
- * ::hsa_amd_agent_iterate_memory_pools on the agent) with that agent. If the
- * buffer is currently located in a memory pool that is not associated with
- * the agent, and the value returned by this function for the given
- * combination of agent and memory pool is not
- * HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED, the application still needs to invoke
- * ::hsa_amd_agents_allow_access in order to gain direct access to the buffer.
- *
- * If the given agent can directly access buffers the pool, the result is not
- * HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED. If the memory pool is associated with
- * the agent, or it is of fined-grained type, the result must not be
- * HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED. If the memory pool is not associated
- * with the agent, and does not reside in the global segment, the result must
- * be HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED.
- */
- HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS = 0,
-
- /**
- * Number of links to hop when accessing the memory pool from the specified
- * agent. The value of this attribute is zero if the memory pool is associated
- * with the agent, or if the access type is
- * HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED. The type of this attribute is
- * uint32_t.
- */
- HSA_AMD_AGENT_MEMORY_POOL_INFO_NUM_LINK_HOPS = 1,
-
- /**
- * Details of each link hop when accessing the memory pool starting from the
- * specified agent. The type of this attribute is an array size of
- * HSA_AMD_AGENT_MEMORY_POOL_INFO_NUM_LINK_HOPS with each element containing
- * ::hsa_amd_memory_pool_link_info_t.
- */
- HSA_AMD_AGENT_MEMORY_POOL_INFO_LINK_INFO = 2
-
-} hsa_amd_agent_memory_pool_info_t;
-
-/**
- * @brief Get the current value of an attribute of the relationship between an
- * agent and a memory pool.
- *
- * @param[in] agent Agent.
- *
- * @param[in] memory_pool Memory pool.
- *
- * @param[in] attribute Attribute to query.
- *
- * @param[out] value Pointer to a application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- */
-hsa_status_t HSA_API hsa_amd_agent_memory_pool_get_info(
- hsa_agent_t agent, hsa_amd_memory_pool_t memory_pool,
- hsa_amd_agent_memory_pool_info_t attribute, void* value);
-
-/**
- * @brief Enable direct access to a buffer from a given set of agents.
- *
- * @details
- *
- * Upon return, only the listed agents and the agent associated with the
- * buffer's memory pool have direct access to the @p ptr.
- *
- * Any agent that has access to the buffer before and after the call to
- * ::hsa_amd_agents_allow_access will also have access while
- * ::hsa_amd_agents_allow_access is in progress.
- *
- * The caller is responsible for ensuring that each agent in the list
- * must be able to access the memory pool containing @p ptr
- * (using ::hsa_amd_agent_memory_pool_get_info with ::HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS attribute),
- * otherwise error code is returned.
- *
- * @param[in] num_agents Size of @p agents.
- *
- * @param[in] agents List of agents. If @p num_agents is 0, this argument is
- * ignored.
- *
- * @param[in] flags A list of bit-field that is used to specify access
- * information in a per-agent basis. This is currently reserved and must be NULL.
- *
- * @param[in] ptr A buffer previously allocated using ::hsa_amd_memory_pool_allocate.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p num_agents is 0, or @p agents
- * is NULL, @p flags is not NULL, or attempting to enable access to agent(s)
- * because @p ptr is allocated from an inaccessible pool.
- *
- */
-hsa_status_t HSA_API
- hsa_amd_agents_allow_access(uint32_t num_agents, const hsa_agent_t* agents,
- const uint32_t* flags, const void* ptr);
-
-/**
- * @brief Query if buffers currently located in some memory pool can be
- * relocated to a destination memory pool.
- *
- * @details If the returned value is non-zero, a migration of a buffer to @p
- * dst_memory_pool using ::hsa_amd_memory_migrate may nevertheless fail due to
- * resource limitations.
- *
- * @param[in] src_memory_pool Source memory pool.
- *
- * @param[in] dst_memory_pool Destination memory pool.
- *
- * @param[out] result Pointer to a memory location where the result of the query
- * is stored. Must not be NULL. If buffers currently located in @p
- * src_memory_pool can be relocated to @p dst_memory_pool, the result is
- * true.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_MEMORY_POOL One of the memory pools is
- * invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p result is NULL.
- */
-hsa_status_t HSA_API
- hsa_amd_memory_pool_can_migrate(hsa_amd_memory_pool_t src_memory_pool,
- hsa_amd_memory_pool_t dst_memory_pool,
- bool* result);
-
-/**
- * @brief Relocate a buffer to a new memory pool.
- *
- * @details When a buffer is migrated, its virtual address remains the same but
- * its physical contents are moved to the indicated memory pool.
- *
- * After migration, only the agent associated with the destination pool will have access.
- *
- * The caller is also responsible for ensuring that the allocation in the
- * source memory pool where the buffer is currently located can be migrated to the
- * specified destination memory pool (using ::hsa_amd_memory_pool_can_migrate returns a value of true
- * for the source and destination memory pools), otherwise behavior is undefined.
- *
- * The caller must ensure that the buffer is not accessed while it is migrated.
- *
- * @param[in] ptr Buffer to be relocated. The buffer must have been released to system
- * prior to call this API. The buffer will be released to system upon completion.
- *
- * @param[in] memory_pool Memory pool where to place the buffer.
- *
- * @param[in] flags A bit-field that is used to specify migration
- * information. Must be zero.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_MEMORY_POOL The destination memory pool is
- * invalid.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure in
- * allocating the necessary resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p flags is not 0.
- */
-hsa_status_t HSA_API hsa_amd_memory_migrate(const void* ptr,
- hsa_amd_memory_pool_t memory_pool,
- uint32_t flags);
-
-/**
- *
- * @brief Pin a host pointer allocated by C/C++ or OS allocator (i.e. ordinary system DRAM) and
- * return a new pointer accessible by the @p agents. If the @p host_ptr overlaps with previously
- * locked memory, then the overlap area is kept locked (i.e multiple mappings are permitted). In
- * this case, the same input @p host_ptr may give different locked @p agent_ptr and when it does,
- * they are not necessarily coherent (i.e. accessing either @p agent_ptr is not equivalent).
- * Accesses to @p agent_ptr are coarse grained.
- *
- * @param[in] host_ptr A buffer allocated by C/C++ or OS allocator.
- *
- * @param[in] size The size to be locked.
- *
- * @param[in] agents Array of agent handle to gain access to the @p host_ptr.
- * If this parameter is NULL and the @p num_agent is 0, all agents
- * in the platform will gain access to the @p host_ptr.
- *
- * @param[out] agent_ptr Pointer to the location where to store the new address.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure in
- * allocating the necessary resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT One or more agent in @p agents is
- * invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p size is 0 or @p host_ptr or
- * @p agent_ptr is NULL or @p agents not NULL but @p num_agent is 0 or @p agents
- * is NULL but @p num_agent is not 0.
- */
-hsa_status_t HSA_API hsa_amd_memory_lock(void* host_ptr, size_t size,
- hsa_agent_t* agents, int num_agent,
- void** agent_ptr);
-
-/**
- *
- * @brief Pin a host pointer allocated by C/C++ or OS allocator (i.e. ordinary system DRAM) and
- * return a new pointer accessible by the @p agents. If the @p host_ptr overlaps with previously
- * locked memory, then the overlap area is kept locked (i.e. multiple mappings are permitted).
- * In this case, the same input @p host_ptr may give different locked @p agent_ptr and when it
- * does, they are not necessarily coherent (i.e. accessing either @p agent_ptr is not equivalent).
- * Acesses to the memory via @p agent_ptr have the same access properties as memory allocated from
- * @p pool as determined by ::hsa_amd_memory_pool_get_info and ::hsa_amd_agent_memory_pool_get_info
- * (ex. coarse/fine grain, platform atomic support, link info). Physical composition and placement
- * of the memory (ex. page size, NUMA binding) is not changed.
- *
- * @param[in] host_ptr A buffer allocated by C/C++ or OS allocator.
- *
- * @param[in] size The size to be locked.
- *
- * @param[in] agents Array of agent handle to gain access to the @p host_ptr.
- * If this parameter is NULL and the @p num_agent is 0, all agents
- * in the platform will gain access to the @p host_ptr.
- *
- * @param[in] pool Global memory pool owned by a CPU agent.
- *
- * @param[in] flags A bit-field that is used to specify allocation
- * directives. Reserved parameter, must be 0.
- *
- * @param[out] agent_ptr Pointer to the location where to store the new address.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure in
- * allocating the necessary resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT One or more agent in @p agents is
- * invalid or can not access @p pool.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_MEMORY_POOL @p pool is invalid or not owned
- * by a CPU agent.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p size is 0 or @p host_ptr or
- * @p agent_ptr is NULL or @p agents not NULL but @p num_agent is 0 or @p agents
- * is NULL but @p num_agent is not 0 or flags is not 0.
- */
-hsa_status_t HSA_API hsa_amd_memory_lock_to_pool(void* host_ptr, size_t size, hsa_agent_t* agents,
- int num_agent, hsa_amd_memory_pool_t pool,
- uint32_t flags, void** agent_ptr);
-
-/**
- *
- * @brief Unpin the host pointer previously pinned via ::hsa_amd_memory_lock or
- * ::hsa_amd_memory_lock_to_pool.
- *
- * @details The behavior is undefined if the host pointer being unpinned does not
- * match previous pinned address or if the host pointer was already deallocated.
- *
- * @param[in] host_ptr A buffer allocated by C/C++ or OS allocator that was
- * pinned previously via ::hsa_amd_memory_lock or ::hsa_amd_memory_lock_to_pool.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- */
-hsa_status_t HSA_API hsa_amd_memory_unlock(void* host_ptr);
-
-/**
- * @brief Sets the first @p count of uint32_t of the block of memory pointed by
- * @p ptr to the specified @p value.
- *
- * @param[in] ptr Pointer to the block of memory to fill.
- *
- * @param[in] value Value to be set.
- *
- * @param[in] count Number of uint32_t element to be set to the value.
- *
- * @retval HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is NULL or
- * not 4 bytes aligned
- *
- * @retval HSA_STATUS_ERROR_INVALID_ALLOCATION if the given memory
- * region was not allocated with HSA runtime APIs.
- *
- */
-hsa_status_t HSA_API
- hsa_amd_memory_fill(void* ptr, uint32_t value, size_t count);
-
-/**
- * @brief Maps an interop object into the HSA flat address space and establishes
- * memory residency. The metadata pointer is valid during the lifetime of the
- * map (until hsa_amd_interop_unmap_buffer is called).
- * Multiple calls to hsa_amd_interop_map_buffer with the same interop_handle
- * result in multiple mappings with potentially different addresses and
- * different metadata pointers. Concurrent operations on these addresses are
- * not coherent. Memory must be fenced to system scope to ensure consistency,
- * between mappings and with any views of this buffer in the originating
- * software stack.
- *
- * @param[in] num_agents Number of agents which require access to the memory
- *
- * @param[in] agents List of accessing agents.
- *
- * @param[in] interop_handle Handle of interop buffer (dmabuf handle in Linux)
- *
- * @param [in] flags Reserved, must be 0
- *
- * @param[out] size Size in bytes of the mapped object
- *
- * @param[out] ptr Base address of the mapped object
- *
- * @param[out] metadata_size Size of metadata in bytes, may be NULL
- *
- * @param[out] metadata Pointer to metadata, may be NULL
- *
- * @retval HSA_STATUS_SUCCESS if successfully mapped
- *
- * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized
- *
- * @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating
- * necessary resources
- *
- * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT all other errors
- */
-hsa_status_t HSA_API hsa_amd_interop_map_buffer(uint32_t num_agents,
- hsa_agent_t* agents,
- int interop_handle,
- uint32_t flags,
- size_t* size,
- void** ptr,
- size_t* metadata_size,
- const void** metadata);
-
-/**
- * @brief Removes a previously mapped interop object from HSA's flat address space.
- * Ends lifetime for the mapping's associated metadata pointer.
- */
-hsa_status_t HSA_API hsa_amd_interop_unmap_buffer(void* ptr);
-
-/**
- * @brief Encodes an opaque vendor specific image format. The length of data
- * depends on the underlying format. This structure must not be copied as its
- * true length can not be determined.
- */
-typedef struct hsa_amd_image_descriptor_s {
- /*
- Version number of the descriptor
- */
- uint32_t version;
-
- /*
- Vendor and device PCI IDs for the format as VENDOR_ID<<16|DEVICE_ID.
- */
- uint32_t deviceID;
-
- /*
- Start of vendor specific data.
- */
- uint32_t data[1];
-} hsa_amd_image_descriptor_t;
-
-/**
- * @brief Creates an image from an opaque vendor specific image format.
- * Does not modify data at image_data. Intended initially for
- * accessing interop images.
- *
- * @param agent[in] Agent on which to create the image
- *
- * @param[in] image_descriptor[in] Vendor specific image format
- *
- * @param[in] image_data Pointer to image backing store
- *
- * @param[in] access_permission Access permissions for the image object
- *
- * @param[out] image Created image object.
- *
- * @retval HSA_STATUS_SUCCESS Image created successfully
- *
- * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized
- *
- * @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating
- * necessary resources
- *
- * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT Bad or mismatched descriptor,
- * null image_data, or mismatched access_permission.
- */
-hsa_status_t HSA_API hsa_amd_image_create(
- hsa_agent_t agent,
- const hsa_ext_image_descriptor_t *image_descriptor,
- const hsa_amd_image_descriptor_t *image_layout,
- const void *image_data,
- hsa_access_permission_t access_permission,
- hsa_ext_image_t *image
-);
-
-/**
- * @brief Denotes the type of memory in a pointer info query.
- */
-typedef enum {
- /*
- Memory is not known to the HSA driver. Unallocated or unlocked system memory.
- */
- HSA_EXT_POINTER_TYPE_UNKNOWN = 0,
- /*
- Memory was allocated with an HSA memory allocator.
- */
- HSA_EXT_POINTER_TYPE_HSA = 1,
- /*
- System memory which has been locked for use with an HSA agent.
-
- Memory of this type is normal malloc'd memory and is always accessible to
- the CPU. Pointer info queries may not include CPU agents in the accessible
- agents list as the CPU has implicit access.
- */
- HSA_EXT_POINTER_TYPE_LOCKED = 2,
- /*
- Memory originated in a graphics component and is shared with ROCr.
- */
- HSA_EXT_POINTER_TYPE_GRAPHICS = 3,
- /*
- Memory has been shared with the local process via ROCr IPC APIs.
- */
- HSA_EXT_POINTER_TYPE_IPC = 4
-} hsa_amd_pointer_type_t;
-
-/**
- * @brief Describes a memory allocation known to ROCr.
- * Within a ROCr major version this structure can only grow.
- */
-typedef struct hsa_amd_pointer_info_s {
- /*
- Size in bytes of this structure. Used for version control within a major ROCr
- revision. Set to sizeof(hsa_amd_pointer_t) prior to calling
- hsa_amd_pointer_info. If the runtime supports an older version of pointer
- info then size will be smaller on return. Members starting after the return
- value of size will not be updated by hsa_amd_pointer_info.
- */
- uint32_t size;
- /*
- The type of allocation referenced.
- */
- hsa_amd_pointer_type_t type;
- /*
- Base address at which non-host agents may access the allocation.
- */
- void* agentBaseAddress;
- /*
- Base address at which the host agent may access the allocation.
- */
- void* hostBaseAddress;
- /*
- Size of the allocation
- */
- size_t sizeInBytes;
- /*
- Application provided value.
- */
- void* userData;
- /*
- Reports an agent which "owns" (ie has preferred access to) the pool in which the allocation was
- made. When multiple agents share equal access to a pool (ex: multiple CPU agents, or multi-die
- GPU boards) any such agent may be returned.
- */
- hsa_agent_t agentOwner;
-} hsa_amd_pointer_info_t;
-
-/**
- * @brief Retrieves information about the allocation referenced by the given
- * pointer. Optionally returns the number and list of agents which can
- * directly access the allocation.
- *
- * @param[in] ptr Pointer which references the allocation to retrieve info for.
- *
- * @param[in, out] info Pointer to structure to be filled with allocation info.
- * Data member size must be set to the size of the structure prior to calling
- * hsa_amd_pointer_info. On return size will be set to the size of the
- * pointer info structure supported by the runtime, if smaller. Members
- * beyond the returned value of size will not be updated by the API.
- * Must not be NULL.
- *
- * @param[in] alloc Function pointer to an allocator used to allocate the
- * @p accessible array. If NULL @p accessible will not be returned.
- *
- * @param[out] num_agents_accessible Recieves the count of agents in
- * @p accessible. If NULL @p accessible will not be returned.
- *
- * @param[out] accessible Recieves a pointer to the array, allocated by @p alloc,
- * holding the list of agents which may directly access the allocation.
- * May be NULL.
- *
- * @retval HSA_STATUS_SUCCESS Info retrieved successfully
- *
- * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized
- *
- * @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating
- * necessary resources
- *
- * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT NULL in @p ptr or @p info.
- */
-hsa_status_t HSA_API hsa_amd_pointer_info(void* ptr,
- hsa_amd_pointer_info_t* info,
- void* (*alloc)(size_t),
- uint32_t* num_agents_accessible,
- hsa_agent_t** accessible);
-
-/**
- * @brief Associates an arbitrary pointer with an allocation known to ROCr.
- * The pointer can be fetched by hsa_amd_pointer_info in the userData field.
- *
- * @param[in] ptr Pointer to the first byte of an allocation known to ROCr
- * with which to associate @p userdata.
- *
- * @param[in] userdata Abitrary pointer to associate with the allocation.
- *
- * @retval HSA_STATUS_SUCCESS @p userdata successfully stored.
- *
- * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized
- *
- * @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating
- * necessary resources
- *
- * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is not known to ROCr.
- */
-hsa_status_t HSA_API hsa_amd_pointer_info_set_userdata(void* ptr,
- void* userdata);
-
-/**
- * @brief 256-bit process independent identifier for a ROCr shared memory
- * allocation.
- */
-typedef struct hsa_amd_ipc_memory_s {
- uint32_t handle[8];
-} hsa_amd_ipc_memory_t;
-
-/**
- * @brief Prepares an allocation for interprocess sharing and creates a
- * handle of type hsa_amd_ipc_memory_t uniquely identifying the allocation. A
- * handle is valid while the allocation it references remains accessible in
- * any process. In general applications should confirm that a shared memory
- * region has been attached (via hsa_amd_ipc_memory_attach) in the remote
- * process prior to releasing that memory in the local process.
- * Repeated calls for the same allocation may, but are not required to, return
- * unique handles.
- *
- * @param[in] ptr Pointer to memory allocated via ROCr APIs to prepare for
- * sharing.
- *
- * @param[in] len Length in bytes of the allocation to share.
- *
- * @param[out] handle Process independent identifier referencing the shared
- * allocation.
- *
- * @retval HSA_STATUS_SUCCESS allocation is prepared for interprocess sharing.
- *
- * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized
- *
- * @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating
- * necessary resources
- *
- * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr does not point to the
- * first byte of an allocation made through ROCr, or len is not the full length
- * of the allocation or handle is NULL.
- */
-hsa_status_t HSA_API hsa_amd_ipc_memory_create(void* ptr, size_t len,
- hsa_amd_ipc_memory_t* handle);
-
-/**
- * @brief Imports shared memory into the local process and makes it accessible
- * by the given agents. If a shared memory handle is attached multiple times
- * in a process each attach may return a different address. Each returned
- * address is refcounted and requires a matching number of calls to
- * hsa_amd_ipc_memory_detach to release the shared memory mapping.
- *
- * @param[in] handle Pointer to the identifier for the shared memory.
- *
- * @param[in] len Length of the shared memory to import.
- * Reserved. Must be the full length of the shared allocation in this version.
- *
- * @param[in] num_agents Count of agents in @p mapping_agents.
- * May be zero if all agents are to be allowed access.
- *
- * @param[in] mapping_agents List of agents to access the shared memory.
- * Ignored if @p num_agents is zero.
- *
- * @param[out] mapped_ptr Recieves a process local pointer to the shared memory.
- *
- * @retval HSA_STATUS_SUCCESS if memory is successfully imported.
- *
- * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized
- *
- * @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating
- * necessary resources
- *
- * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p handle is not valid, @p len is
- * incorrect, @p mapped_ptr is NULL, or some agent for which access was
- * requested can not access the shared memory.
- */
-hsa_status_t HSA_API hsa_amd_ipc_memory_attach(
- const hsa_amd_ipc_memory_t* handle, size_t len,
- uint32_t num_agents,
- const hsa_agent_t* mapping_agents,
- void** mapped_ptr);
-
-/**
- * @brief Decrements the reference count for the shared memory mapping and
- * releases access to shared memory imported with hsa_amd_ipc_memory_attach.
- *
- * @param[in] mapped_ptr Pointer to the first byte of a shared allocation
- * imported with hsa_amd_ipc_memory_attach.
- *
- * @retval HSA_STATUS_SUCCESS if @p mapped_ptr was imported with
- * hsa_amd_ipc_memory_attach.
- *
- * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized
- *
- * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p mapped_ptr was not imported
- * with hsa_amd_ipc_memory_attach.
- */
-hsa_status_t HSA_API hsa_amd_ipc_memory_detach(void* mapped_ptr);
-
-/**
- * @brief 256-bit process independent identifier for a ROCr IPC signal.
- */
-typedef hsa_amd_ipc_memory_t hsa_amd_ipc_signal_t;
-
-/**
- * @brief Obtains an interprocess sharing handle for a signal. The handle is
- * valid while the signal it references remains valid in any process. In
- * general applications should confirm that the signal has been attached (via
- * hsa_amd_ipc_signal_attach) in the remote process prior to destroying that
- * signal in the local process.
- * Repeated calls for the same signal may, but are not required to, return
- * unique handles.
- *
- * @param[in] signal Signal created with attribute HSA_AMD_SIGNAL_IPC.
- *
- * @param[out] handle Process independent identifier referencing the shared
- * signal.
- *
- * @retval HSA_STATUS_SUCCESS @p handle is ready to use for interprocess sharing.
- *
- * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized
- *
- * @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating
- * necessary resources
- *
- * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p signal is not a valid signal
- * created with attribute HSA_AMD_SIGNAL_IPC or handle is NULL.
- */
-hsa_status_t HSA_API hsa_amd_ipc_signal_create(hsa_signal_t signal, hsa_amd_ipc_signal_t* handle);
-
-/**
- * @brief Imports an IPC capable signal into the local process. If an IPC
- * signal handle is attached multiple times in a process each attach may return
- * a different signal handle. Each returned signal handle is refcounted and
- * requires a matching number of calls to hsa_signal_destroy to release the
- * shared signal.
- *
- * @param[in] handle Pointer to the identifier for the shared signal.
- *
- * @param[out] signal Recieves a process local signal handle to the shared signal.
- *
- * @retval HSA_STATUS_SUCCESS if the signal is successfully imported.
- *
- * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized
- *
- * @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating
- * necessary resources
- *
- * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p handle is not valid.
- */
-hsa_status_t HSA_API hsa_amd_ipc_signal_attach(const hsa_amd_ipc_signal_t* handle,
- hsa_signal_t* signal);
-
-/**
- * @brief GPU system event type.
- */
-typedef enum hsa_amd_event_type_s {
- /*
- AMD GPU memory fault.
- */
- HSA_AMD_GPU_MEMORY_FAULT_EVENT = 0,
-} hsa_amd_event_type_t;
-
-/**
- * @brief Flags denoting the cause of a memory fault.
- */
-typedef enum {
- // Page not present or supervisor privilege.
- HSA_AMD_MEMORY_FAULT_PAGE_NOT_PRESENT = 1 << 0,
- // Write access to a read-only page.
- HSA_AMD_MEMORY_FAULT_READ_ONLY = 1 << 1,
- // Execute access to a page marked NX.
- HSA_AMD_MEMORY_FAULT_NX = 1 << 2,
- // GPU attempted access to a host only page.
- HSA_AMD_MEMORY_FAULT_HOST_ONLY = 1 << 3,
- // DRAM ECC failure.
- HSA_AMD_MEMORY_FAULT_DRAMECC = 1 << 4,
- // Can't determine the exact fault address.
- HSA_AMD_MEMORY_FAULT_IMPRECISE = 1 << 5,
- // SRAM ECC failure (ie registers, no fault address).
- HSA_AMD_MEMORY_FAULT_SRAMECC = 1 << 6,
- // GPU reset following unspecified hang.
- HSA_AMD_MEMORY_FAULT_HANG = 1 << 31
-} hsa_amd_memory_fault_reason_t;
-
-/**
- * @brief AMD GPU memory fault event data.
- */
-typedef struct hsa_amd_gpu_memory_fault_info_s {
- /*
- The agent where the memory fault occurred.
- */
- hsa_agent_t agent;
- /*
- Virtual address accessed.
- */
- uint64_t virtual_address;
- /*
- Bit field encoding the memory access failure reasons. There could be multiple bits set
- for one fault. Bits are defined in hsa_amd_memory_fault_reason_t.
- */
- uint32_t fault_reason_mask;
-} hsa_amd_gpu_memory_fault_info_t;
-
-/**
- * @brief AMD GPU event data passed to event handler.
- */
-typedef struct hsa_amd_event_s {
- /*
- The event type.
- */
- hsa_amd_event_type_t event_type;
- union {
- /*
- The memory fault info, only valid when @p event_type is HSA_AMD_GPU_MEMORY_FAULT_EVENT.
- */
- hsa_amd_gpu_memory_fault_info_t memory_fault;
- };
-} hsa_amd_event_t;
-
-typedef hsa_status_t (*hsa_amd_system_event_callback_t)(const hsa_amd_event_t* event, void* data);
-
-/**
- * @brief Register AMD GPU event handler.
- *
- * @param[in] callback Callback to be invoked when an event is triggered.
- * The HSA runtime passes two arguments to the callback: @p event
- * is defined per event by the HSA runtime, and @p data is the user data.
- *
- * @param[in] data User data that is passed to @p callback. May be NULL.
- *
- * @retval HSA_STATUS_SUCCESS The handler has been registered successfully.
- *
- * @retval HSA_STATUS_ERROR An event handler has already been registered.
- *
- * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p event is invalid.
- */
-hsa_status_t HSA_API hsa_amd_register_system_event_handler(hsa_amd_system_event_callback_t callback,
- void* data);
-
-/**
- * @brief Per-queue dispatch and wavefront scheduling priority.
- */
-typedef enum hsa_amd_queue_priority_s {
- /*
- Below normal/high priority compute and all graphics
- */
- HSA_AMD_QUEUE_PRIORITY_LOW = 0,
- /*
- Above low priority compute, below high priority compute and all graphics
- */
- HSA_AMD_QUEUE_PRIORITY_NORMAL = 1,
- /*
- Above low/normal priority compute and all graphics
- */
- HSA_AMD_QUEUE_PRIORITY_HIGH = 2,
-} hsa_amd_queue_priority_t;
-
-/**
- * @brief Modifies the dispatch and wavefront scheduling prioirty for a
- * given compute queue. The default is HSA_AMD_QUEUE_PRIORITY_NORMAL.
- *
- * @param[in] queue Compute queue to apply new priority to.
- *
- * @param[in] priority Priority to associate with queue.
- *
- * @retval HSA_STATUS_SUCCESS if priority was changed successfully.
- *
- * @retval HSA_STATUS_ERROR_INVALID_QUEUE if queue is not a valid
- * compute queue handle.
- *
- * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT if priority is not a valid
- * value from hsa_amd_queue_priority_t.
- */
-hsa_status_t HSA_API hsa_amd_queue_set_priority(hsa_queue_t* queue,
- hsa_amd_queue_priority_t priority);
-
-/**
- * @brief Deallocation notifier function type.
- */
-typedef void (*hsa_amd_deallocation_callback_t)(void* ptr, void* user_data);
-
-/**
- * @brief Registers a deallocation notifier monitoring for release of agent
- * accessible address @p ptr. If successful, @p callback will be invoked when
- * @p ptr is removed from accessibility from all agents.
- *
- * Notification callbacks are automatically deregistered when they are invoked.
- *
- * Note: The current version supports notifications of address release
- * originating from ::hsa_amd_memory_pool_free. Support for other address
- * release APIs will follow.
- *
- * @param[in] ptr Agent accessible address to monitor for deallocation. Passed
- * to @p callback.
- *
- * @param[in] callback Notifier to be invoked when @p ptr is released from
- * agent accessibility.
- *
- * @param[in] user_data User provided value passed to @p callback. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The notifier registered successfully
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION @p ptr does not refer to a valid agent accessible
- * address.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL or @p ptr is NULL.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating
- * necessary resources
- */
-hsa_status_t HSA_API hsa_amd_register_deallocation_callback(void* ptr,
- hsa_amd_deallocation_callback_t callback,
- void* user_data);
-
-/**
- * @brief Removes a deallocation notifier previously registered with
- * ::hsa_amd_register_deallocation_callback. Arguments must be identical to
- * those given in ::hsa_amd_register_deallocation_callback.
- *
- * @param[in] ptr Agent accessible address which was monitored for deallocation.
- *
- * @param[in] callback Notifier to be removed.
- *
- * @retval ::HSA_STATUS_SUCCESS The notifier has been removed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT The given notifier was not registered.
- */
-hsa_status_t HSA_API hsa_amd_deregister_deallocation_callback(void* ptr,
- hsa_amd_deallocation_callback_t callback);
-
-#ifdef __cplusplus
-} // end extern "C" block
-#endif
-
-#endif // header guard
diff --git a/third_party/rocm/include/hsa/hsa_ext_finalize.h b/third_party/rocm/include/hsa/hsa_ext_finalize.h
deleted file mode 100644
index 94c4582..0000000
--- a/third_party/rocm/include/hsa/hsa_ext_finalize.h
+++ /dev/null
@@ -1,531 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-//
-// The University of Illinois/NCSA
-// Open Source License (NCSA)
-//
-// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
-//
-// Developed by:
-//
-// AMD Research and AMD HSA Software Development
-//
-// Advanced Micro Devices, Inc.
-//
-// www.amd.com
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to
-// deal with the Software without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-// and/or sell copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following conditions:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimers.
-// - Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimers in
-// the documentation and/or other materials provided with the distribution.
-// - Neither the names of Advanced Micro Devices, Inc,
-// nor the names of its contributors may be used to endorse or promote
-// products derived from this Software without specific prior written
-// permission.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-// DEALINGS WITH THE SOFTWARE.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
-#define HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
-
-#include "hsa.h"
-
-#undef HSA_API
-#ifdef HSA_EXPORT_FINALIZER
-#define HSA_API HSA_API_EXPORT
-#else
-#define HSA_API HSA_API_IMPORT
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif // __cplusplus
-
-struct BrigModuleHeader;
-typedef struct BrigModuleHeader* BrigModule_t;
-
-/** \defgroup ext-alt-finalizer-extensions Finalization Extensions
- * @{
- */
-
-/**
- * @brief Enumeration constants added to ::hsa_status_t by this extension.
- */
-enum {
- /**
- * The HSAIL program is invalid.
- */
- HSA_EXT_STATUS_ERROR_INVALID_PROGRAM = 0x2000,
- /**
- * The HSAIL module is invalid.
- */
- HSA_EXT_STATUS_ERROR_INVALID_MODULE = 0x2001,
- /**
- * Machine model or profile of the HSAIL module do not match the machine model
- * or profile of the HSAIL program.
- */
- HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE = 0x2002,
- /**
- * The HSAIL module is already a part of the HSAIL program.
- */
- HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED = 0x2003,
- /**
- * Compatibility mismatch between symbol declaration and symbol definition.
- */
- HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH = 0x2004,
- /**
- * The finalization encountered an error while finalizing a kernel or
- * indirect function.
- */
- HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED = 0x2005,
- /**
- * Mismatch between a directive in the control directive structure and in
- * the HSAIL kernel.
- */
- HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH = 0x2006
-};
-
-/** @} */
-
-/** \defgroup ext-alt-finalizer-program Finalization Program
- * @{
- */
-
-/**
- * @brief HSAIL (BRIG) module. The HSA Programmer's Reference Manual contains
- * the definition of the BrigModule_t type.
- */
-typedef BrigModule_t hsa_ext_module_t;
-
-/**
- * @brief An opaque handle to a HSAIL program, which groups a set of HSAIL
- * modules that collectively define functions and variables used by kernels and
- * indirect functions.
- */
-typedef struct hsa_ext_program_s {
- /**
- * Opaque handle.
- */
- uint64_t handle;
-} hsa_ext_program_t;
-
-/**
- * @brief Create an empty HSAIL program.
- *
- * @param[in] machine_model Machine model used in the HSAIL program.
- *
- * @param[in] profile Profile used in the HSAIL program.
- *
- * @param[in] default_float_rounding_mode Default float rounding mode used in
- * the HSAIL program.
- *
- * @param[in] options Vendor-specific options. May be NULL.
- *
- * @param[out] program Memory location where the HSA runtime stores the newly
- * created HSAIL program handle.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
- * resources required for the operation.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p machine_model is invalid,
- * @p profile is invalid, @p default_float_rounding_mode is invalid, or
- * @p program is NULL.
- */
-hsa_status_t HSA_API hsa_ext_program_create(
- hsa_machine_model_t machine_model,
- hsa_profile_t profile,
- hsa_default_float_rounding_mode_t default_float_rounding_mode,
- const char *options,
- hsa_ext_program_t *program);
-
-/**
- * @brief Destroy a HSAIL program.
- *
- * @details The HSAIL program handle becomes invalid after it has been
- * destroyed. Code object handles produced by ::hsa_ext_program_finalize are
- * still valid after the HSAIL program has been destroyed, and can be used as
- * intended. Resources allocated outside and associated with the HSAIL program
- * (such as HSAIL modules that are added to the HSAIL program) can be released
- * after the finalization program has been destroyed.
- *
- * @param[in] program HSAIL program.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is
- * invalid.
- */
-hsa_status_t HSA_API hsa_ext_program_destroy(
- hsa_ext_program_t program);
-
-/**
- * @brief Add a HSAIL module to an existing HSAIL program.
- *
- * @details The HSA runtime does not perform a deep copy of the HSAIL module
- * upon addition. Instead, it stores a pointer to the HSAIL module. The
- * ownership of the HSAIL module belongs to the application, which must ensure
- * that @p module is not released before destroying the HSAIL program.
- *
- * The HSAIL module is successfully added to the HSAIL program if @p module is
- * valid, if all the declarations and definitions for the same symbol are
- * compatible, and if @p module specify machine model and profile that matches
- * the HSAIL program.
- *
- * @param[in] program HSAIL program.
- *
- * @param[in] module HSAIL module. The application can add the same HSAIL module
- * to @p program at most once. The HSAIL module must specify the same machine
- * model and profile as @p program. If the floating-mode rounding mode of @p
- * module is not default, then it should match that of @p program.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
- * resources required for the operation.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is invalid.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_INVALID_MODULE The HSAIL module is invalid.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE The machine model of @p
- * module does not match machine model of @p program, or the profile of @p
- * module does not match profile of @p program.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED The HSAIL module is
- * already a part of the HSAIL program.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH Symbol declaration and symbol
- * definition compatibility mismatch. See the symbol compatibility rules in the
- * HSA Programming Reference Manual.
- */
-hsa_status_t HSA_API hsa_ext_program_add_module(
- hsa_ext_program_t program,
- hsa_ext_module_t module);
-
-/**
- * @brief Iterate over the HSAIL modules in a program, and invoke an
- * application-defined callback on every iteration.
- *
- * @param[in] program HSAIL program.
- *
- * @param[in] callback Callback to be invoked once per HSAIL module in the
- * program. The HSA runtime passes three arguments to the callback: the program,
- * a HSAIL module, and the application data. If @p callback returns a status
- * other than ::HSA_STATUS_SUCCESS for a particular iteration, the traversal
- * stops and ::hsa_ext_program_iterate_modules returns that status value.
- *
- * @param[in] data Application data that is passed to @p callback on every
- * iteration. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The program is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
- */
-hsa_status_t HSA_API hsa_ext_program_iterate_modules(
- hsa_ext_program_t program,
- hsa_status_t (*callback)(hsa_ext_program_t program, hsa_ext_module_t module,
- void* data),
- void* data);
-
-/**
- * @brief HSAIL program attributes.
- */
-typedef enum {
- /**
- * Machine model specified when the HSAIL program was created. The type
- * of this attribute is ::hsa_machine_model_t.
- */
- HSA_EXT_PROGRAM_INFO_MACHINE_MODEL = 0,
- /**
- * Profile specified when the HSAIL program was created. The type of
- * this attribute is ::hsa_profile_t.
- */
- HSA_EXT_PROGRAM_INFO_PROFILE = 1,
- /**
- * Default float rounding mode specified when the HSAIL program was
- * created. The type of this attribute is ::hsa_default_float_rounding_mode_t.
- */
- HSA_EXT_PROGRAM_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 2
-} hsa_ext_program_info_t;
-
-/**
- * @brief Get the current value of an attribute for a given HSAIL program.
- *
- * @param[in] program HSAIL program.
- *
- * @param[in] attribute Attribute to query.
- *
- * @param[out] value Pointer to an application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behaviour is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
- * HSAIL program attribute, or @p value is NULL.
- */
-hsa_status_t HSA_API hsa_ext_program_get_info(
- hsa_ext_program_t program,
- hsa_ext_program_info_t attribute,
- void *value);
-
-/**
- * @brief Finalizer-determined call convention.
- */
-typedef enum {
- /**
- * Finalizer-determined call convention.
- */
- HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO = -1
-} hsa_ext_finalizer_call_convention_t;
-
-/**
- * @brief Control directives specify low-level information about the
- * finalization process.
- */
-typedef struct hsa_ext_control_directives_s {
- /**
- * Bitset indicating which control directives are enabled. The bit assigned to
- * a control directive is determined by the corresponding value in
- * BrigControlDirective.
- *
- * If a control directive is disabled, its corresponding field value (if any)
- * must be 0. Control directives that are only present or absent (such as
- * partial workgroups) have no corresponding field as the presence of the bit
- * in this mask is sufficient.
- */
- uint64_t control_directives_mask;
- /**
- * Bitset of HSAIL exceptions that must have the BREAK policy enabled. The bit
- * assigned to an HSAIL exception is determined by the corresponding value
- * in BrigExceptionsMask. If the kernel contains a enablebreakexceptions
- * control directive, the finalizer uses the union of the two masks.
- */
- uint16_t break_exceptions_mask;
- /**
- * Bitset of HSAIL exceptions that must have the DETECT policy enabled. The
- * bit assigned to an HSAIL exception is determined by the corresponding value
- * in BrigExceptionsMask. If the kernel contains a enabledetectexceptions
- * control directive, the finalizer uses the union of the two masks.
- */
- uint16_t detect_exceptions_mask;
- /**
- * Maximum size (in bytes) of dynamic group memory that will be allocated by
- * the application for any dispatch of the kernel. If the kernel contains a
- * maxdynamicsize control directive, the two values should match.
- */
- uint32_t max_dynamic_group_size;
- /**
- * Maximum number of grid work-items that will be used by the application to
- * launch the kernel. If the kernel contains a maxflatgridsize control
- * directive, the value of @a max_flat_grid_size must not be greater than the
- * value of the directive, and takes precedence.
- *
- * The value specified for maximum absolute grid size must be greater than or
- * equal to the product of the values specified by @a required_grid_size.
- *
- * If the bit at position BRIG_CONTROL_MAXFLATGRIDSIZE is set in @a
- * control_directives_mask, this field must be greater than 0.
- */
- uint64_t max_flat_grid_size;
- /**
- * Maximum number of work-group work-items that will be used by the
- * application to launch the kernel. If the kernel contains a
- * maxflatworkgroupsize control directive, the value of @a
- * max_flat_workgroup_size must not be greater than the value of the
- * directive, and takes precedence.
- *
- * The value specified for maximum absolute grid size must be greater than or
- * equal to the product of the values specified by @a required_workgroup_size.
- *
- * If the bit at position BRIG_CONTROL_MAXFLATWORKGROUPSIZE is set in @a
- * control_directives_mask, this field must be greater than 0.
- */
- uint32_t max_flat_workgroup_size;
- /**
- * Reserved. Must be 0.
- */
- uint32_t reserved1;
- /**
- * Grid size that will be used by the application in any dispatch of the
- * kernel. If the kernel contains a requiredgridsize control directive, the
- * dimensions should match.
- *
- * The specified grid size must be consistent with @a required_workgroup_size
- * and @a required_dim. Also, the product of the three dimensions must not
- * exceed @a max_flat_grid_size. Note that the listed invariants must hold
- * only if all the corresponding control directives are enabled.
- *
- * If the bit at position BRIG_CONTROL_REQUIREDGRIDSIZE is set in @a
- * control_directives_mask, the three dimension values must be greater than 0.
- */
- uint64_t required_grid_size[3];
- /**
- * Work-group size that will be used by the application in any dispatch of the
- * kernel. If the kernel contains a requiredworkgroupsize control directive,
- * the dimensions should match.
- *
- * The specified work-group size must be consistent with @a required_grid_size
- * and @a required_dim. Also, the product of the three dimensions must not
- * exceed @a max_flat_workgroup_size. Note that the listed invariants must
- * hold only if all the corresponding control directives are enabled.
- *
- * If the bit at position BRIG_CONTROL_REQUIREDWORKGROUPSIZE is set in @a
- * control_directives_mask, the three dimension values must be greater than 0.
- */
- hsa_dim3_t required_workgroup_size;
- /**
- * Number of dimensions that will be used by the application to launch the
- * kernel. If the kernel contains a requireddim control directive, the two
- * values should match.
- *
- * The specified dimensions must be consistent with @a required_grid_size and
- * @a required_workgroup_size. This invariant must hold only if all the
- * corresponding control directives are enabled.
- *
- * If the bit at position BRIG_CONTROL_REQUIREDDIM is set in @a
- * control_directives_mask, this field must be 1, 2, or 3.
- */
- uint8_t required_dim;
- /**
- * Reserved. Must be 0.
- */
- uint8_t reserved2[75];
-} hsa_ext_control_directives_t;
-
-/**
- * @brief Finalize an HSAIL program for a given instruction set architecture.
- *
- * @details Finalize all of the kernels and indirect functions that belong to
- * the same HSAIL program for a specific instruction set architecture (ISA). The
- * transitive closure of all functions specified by call or scall must be
- * defined. Kernels and indirect functions that are being finalized must be
- * defined. Kernels and indirect functions that are referenced in kernels and
- * indirect functions being finalized may or may not be defined, but must be
- * declared. All the global/readonly segment variables that are referenced in
- * kernels and indirect functions being finalized may or may not be defined, but
- * must be declared.
- *
- * @param[in] program HSAIL program.
- *
- * @param[in] isa Instruction set architecture to finalize for.
- *
- * @param[in] call_convention A call convention used in a finalization. Must
- * have a value between ::HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO (inclusive)
- * and the value of the attribute ::HSA_ISA_INFO_CALL_CONVENTION_COUNT in @p
- * isa (not inclusive).
- *
- * @param[in] control_directives Low-level control directives that influence
- * the finalization process.
- *
- * @param[in] options Vendor-specific options. May be NULL.
- *
- * @param[in] code_object_type Type of code object to produce.
- *
- * @param[out] code_object Code object generated by the Finalizer, which
- * contains the machine code for the kernels and indirect functions in the HSAIL
- * program. The code object is independent of the HSAIL module that was used to
- * generate it.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
- * resources required for the operation.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is
- * invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ISA @p isa is invalid.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH The directive in
- * the control directive structure and in the HSAIL kernel mismatch, or if the
- * same directive is used with a different value in one of the functions used by
- * this kernel.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED The Finalizer
- * encountered an error while compiling a kernel or an indirect function.
- */
-hsa_status_t HSA_API hsa_ext_program_finalize(
- hsa_ext_program_t program,
- hsa_isa_t isa,
- int32_t call_convention,
- hsa_ext_control_directives_t control_directives,
- const char *options,
- hsa_code_object_type_t code_object_type,
- hsa_code_object_t *code_object);
-
-/** @} */
-
-#define hsa_ext_finalizer_1_00
-
-typedef struct hsa_ext_finalizer_1_00_pfn_s {
- hsa_status_t (*hsa_ext_program_create)(
- hsa_machine_model_t machine_model, hsa_profile_t profile,
- hsa_default_float_rounding_mode_t default_float_rounding_mode,
- const char *options, hsa_ext_program_t *program);
-
- hsa_status_t (*hsa_ext_program_destroy)(hsa_ext_program_t program);
-
- hsa_status_t (*hsa_ext_program_add_module)(hsa_ext_program_t program,
- hsa_ext_module_t module);
-
- hsa_status_t (*hsa_ext_program_iterate_modules)(
- hsa_ext_program_t program,
- hsa_status_t (*callback)(hsa_ext_program_t program,
- hsa_ext_module_t module, void *data),
- void *data);
-
- hsa_status_t (*hsa_ext_program_get_info)(
- hsa_ext_program_t program, hsa_ext_program_info_t attribute,
- void *value);
-
- hsa_status_t (*hsa_ext_program_finalize)(
- hsa_ext_program_t program, hsa_isa_t isa, int32_t call_convention,
- hsa_ext_control_directives_t control_directives, const char *options,
- hsa_code_object_type_t code_object_type, hsa_code_object_t *code_object);
-} hsa_ext_finalizer_1_00_pfn_t;
-
-#ifdef __cplusplus
-} // extern "C" block
-#endif // __cplusplus
-
-#endif // HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
diff --git a/third_party/rocm/include/hsa/hsa_ext_image.h b/third_party/rocm/include/hsa/hsa_ext_image.h
deleted file mode 100644
index b25f168..0000000
--- a/third_party/rocm/include/hsa/hsa_ext_image.h
+++ /dev/null
@@ -1,1454 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-//
-// The University of Illinois/NCSA
-// Open Source License (NCSA)
-//
-// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
-//
-// Developed by:
-//
-// AMD Research and AMD HSA Software Development
-//
-// Advanced Micro Devices, Inc.
-//
-// www.amd.com
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to
-// deal with the Software without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-// and/or sell copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following conditions:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimers.
-// - Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimers in
-// the documentation and/or other materials provided with the distribution.
-// - Neither the names of Advanced Micro Devices, Inc,
-// nor the names of its contributors may be used to endorse or promote
-// products derived from this Software without specific prior written
-// permission.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-// DEALINGS WITH THE SOFTWARE.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef HSA_EXT_IMAGE_H
-#define HSA_EXT_IMAGE_H
-
-#include "hsa.h"
-
-#undef HSA_API
-#ifdef HSA_EXPORT_IMAGES
-#define HSA_API HSA_API_EXPORT
-#else
-#define HSA_API HSA_API_IMPORT
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif /*__cplusplus*/
-
-/** \defgroup ext-images Images and Samplers
- * @{
- */
-
-/**
- * @brief Enumeration constants added to ::hsa_status_t by this extension.
- *
- * @remark Additions to hsa_status_t
- */
-enum {
- /**
- * Image format is not supported.
- */
- HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED = 0x3000,
- /**
- * Image size is not supported.
- */
- HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED = 0x3001,
- /**
- * Image pitch is not supported or invalid.
- */
- HSA_EXT_STATUS_ERROR_IMAGE_PITCH_UNSUPPORTED = 0x3002,
- /**
- * Sampler descriptor is not supported or invalid.
- */
- HSA_EXT_STATUS_ERROR_SAMPLER_DESCRIPTOR_UNSUPPORTED = 0x3003
-};
-
-/**
- * @brief Enumeration constants added to ::hsa_agent_info_t by this
- * extension.
- *
- * @remark Additions to hsa_agent_info_t
- */
-enum {
- /**
- * Maximum number of elements in 1D images. Must be at least 16384. The type
- * of this attribute is size_t.
- */
- HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS = 0x3000,
- /**
- * Maximum number of elements in 1DA images. Must be at least 16384. The type
- * of this attribute is size_t.
- */
- HSA_EXT_AGENT_INFO_IMAGE_1DA_MAX_ELEMENTS = 0x3001,
- /**
- * Maximum number of elements in 1DB images. Must be at least 65536. The type
- * of this attribute is size_t.
- */
- HSA_EXT_AGENT_INFO_IMAGE_1DB_MAX_ELEMENTS = 0x3002,
- /**
- * Maximum dimensions (width, height) of 2D images, in image elements. The X
- * and Y maximums must be at least 16384. The type of this attribute is
- * size_t[2].
- */
- HSA_EXT_AGENT_INFO_IMAGE_2D_MAX_ELEMENTS = 0x3003,
- /**
- * Maximum dimensions (width, height) of 2DA images, in image elements. The X
- * and Y maximums must be at least 16384. The type of this attribute is
- * size_t[2].
- */
- HSA_EXT_AGENT_INFO_IMAGE_2DA_MAX_ELEMENTS = 0x3004,
- /**
- * Maximum dimensions (width, height) of 2DDEPTH images, in image
- * elements. The X and Y maximums must be at least 16384. The type of this
- * attribute is size_t[2].
- */
- HSA_EXT_AGENT_INFO_IMAGE_2DDEPTH_MAX_ELEMENTS = 0x3005,
- /**
- * Maximum dimensions (width, height) of 2DADEPTH images, in image
- * elements. The X and Y maximums must be at least 16384. The type of this
- * attribute is size_t[2].
- */
- HSA_EXT_AGENT_INFO_IMAGE_2DADEPTH_MAX_ELEMENTS = 0x3006,
- /**
- * Maximum dimensions (width, height, depth) of 3D images, in image
- * elements. The maximum along any dimension must be at least 2048. The type
- * of this attribute is size_t[3].
- */
- HSA_EXT_AGENT_INFO_IMAGE_3D_MAX_ELEMENTS = 0x3007,
- /**
- * Maximum number of image layers in a image array. Must be at least 2048. The
- * type of this attribute is size_t.
- */
- HSA_EXT_AGENT_INFO_IMAGE_ARRAY_MAX_LAYERS = 0x3008,
- /**
- * Maximum number of read-only image handles that can be created for an agent at any one
- * time. Must be at least 128. The type of this attribute is size_t.
- */
- HSA_EXT_AGENT_INFO_MAX_IMAGE_RD_HANDLES = 0x3009,
- /**
- * Maximum number of write-only and read-write image handles (combined) that
- * can be created for an agent at any one time. Must be at least 64. The type of this
- * attribute is size_t.
- */
- HSA_EXT_AGENT_INFO_MAX_IMAGE_RORW_HANDLES = 0x300A,
- /**
- * Maximum number of sampler handlers that can be created for an agent at any one
- * time. Must be at least 16. The type of this attribute is size_t.
- */
- HSA_EXT_AGENT_INFO_MAX_SAMPLER_HANDLERS = 0x300B,
- /**
- * Image pitch alignment. The agent only supports linear image data
- * layouts with a row pitch that is a multiple of this value. Must be
- * a power of 2. The type of this attribute is size_t.
- */
- HSA_EXT_AGENT_INFO_IMAGE_LINEAR_ROW_PITCH_ALIGNMENT = 0x300C
-};
-
-/**
- * @brief Image handle, populated by ::hsa_ext_image_create or
- * ::hsa_ext_image_create_with_layout. Image
- * handles are only unique within an agent, not across agents.
- *
- */
-typedef struct hsa_ext_image_s {
- /**
- * Opaque handle. For a given agent, two handles reference the same object of
- * the enclosing type if and only if they are equal.
- */
- uint64_t handle;
-
-} hsa_ext_image_t;
-
-/**
- * @brief Geometry associated with the image. This specifies the
- * number of image dimensions and whether the image is an image
- * array. See the <em>Image Geometry</em> section in the <em>HSA
- * Programming Reference Manual</em> for definitions on each
- * geometry. The enumeration values match the BRIG type @p
- * hsa_ext_brig_image_geometry_t.
- */
-typedef enum {
-/**
- * One-dimensional image addressed by width coordinate.
- */
- HSA_EXT_IMAGE_GEOMETRY_1D = 0,
-
- /**
- * Two-dimensional image addressed by width and height coordinates.
- */
- HSA_EXT_IMAGE_GEOMETRY_2D = 1,
-
- /**
- * Three-dimensional image addressed by width, height, and depth coordinates.
- */
- HSA_EXT_IMAGE_GEOMETRY_3D = 2,
-
- /**
- * Array of one-dimensional images with the same size and format. 1D arrays
- * are addressed by width and index coordinate.
- */
- HSA_EXT_IMAGE_GEOMETRY_1DA = 3,
-
- /**
- * Array of two-dimensional images with the same size and format. 2D arrays
- * are addressed by width, height, and index coordinates.
- */
- HSA_EXT_IMAGE_GEOMETRY_2DA = 4,
-
- /**
- * One-dimensional image addressed by width coordinate. It has
- * specific restrictions compared to ::HSA_EXT_IMAGE_GEOMETRY_1D. An
- * image with an opaque image data layout will always use a linear
- * image data layout, and one with an explicit image data layout
- * must specify ::HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR.
- */
- HSA_EXT_IMAGE_GEOMETRY_1DB = 5,
-
- /**
- * Two-dimensional depth image addressed by width and height coordinates.
- */
- HSA_EXT_IMAGE_GEOMETRY_2DDEPTH = 6,
-
- /**
- * Array of two-dimensional depth images with the same size and format. 2D
- * arrays are addressed by width, height, and index coordinates.
- */
- HSA_EXT_IMAGE_GEOMETRY_2DADEPTH = 7
-} hsa_ext_image_geometry_t;
-
-/**
- * @brief Channel type associated with the elements of an image. See
- * the <em>Channel Type</em> section in the <em>HSA Programming Reference
- * Manual</em> for definitions on each channel type. The
- * enumeration values and definition match the BRIG type @p
- * hsa_ext_brig_image_channel_type_t.
- */
-typedef enum {
- HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8 = 0,
- HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16 = 1,
- HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8 = 2,
- HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT16 = 3,
- HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT24 = 4,
- HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 = 5,
- HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 = 6,
- HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010 = 7,
- HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8 = 8,
- HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16 = 9,
- HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32 = 10,
- HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 = 11,
- HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 = 12,
- HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 = 13,
- HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT = 14,
- HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT = 15
-} hsa_ext_image_channel_type_t;
-
-/**
- * @brief A fixed-size type used to represent ::hsa_ext_image_channel_type_t constants.
- */
-typedef uint32_t hsa_ext_image_channel_type32_t;
-
-/**
- *
- * @brief Channel order associated with the elements of an image. See
- * the <em>Channel Order</em> section in the <em>HSA Programming Reference
- * Manual</em> for definitions on each channel order. The
- * enumeration values match the BRIG type @p
- * hsa_ext_brig_image_channel_order_t.
- */
-typedef enum {
- HSA_EXT_IMAGE_CHANNEL_ORDER_A = 0,
- HSA_EXT_IMAGE_CHANNEL_ORDER_R = 1,
- HSA_EXT_IMAGE_CHANNEL_ORDER_RX = 2,
- HSA_EXT_IMAGE_CHANNEL_ORDER_RG = 3,
- HSA_EXT_IMAGE_CHANNEL_ORDER_RGX = 4,
- HSA_EXT_IMAGE_CHANNEL_ORDER_RA = 5,
- HSA_EXT_IMAGE_CHANNEL_ORDER_RGB = 6,
- HSA_EXT_IMAGE_CHANNEL_ORDER_RGBX = 7,
- HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA = 8,
- HSA_EXT_IMAGE_CHANNEL_ORDER_BGRA = 9,
- HSA_EXT_IMAGE_CHANNEL_ORDER_ARGB = 10,
- HSA_EXT_IMAGE_CHANNEL_ORDER_ABGR = 11,
- HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB = 12,
- HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX = 13,
- HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA = 14,
- HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA = 15,
- HSA_EXT_IMAGE_CHANNEL_ORDER_INTENSITY = 16,
- HSA_EXT_IMAGE_CHANNEL_ORDER_LUMINANCE = 17,
- HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH = 18,
- HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL = 19
-} hsa_ext_image_channel_order_t;
-
-/**
- * @brief A fixed-size type used to represent ::hsa_ext_image_channel_order_t constants.
- */
-typedef uint32_t hsa_ext_image_channel_order32_t;
-
-
-/**
- * @brief Image format.
- */
-typedef struct hsa_ext_image_format_s {
- /**
- * Channel type.
- */
- hsa_ext_image_channel_type32_t channel_type;
-
- /**
- * Channel order.
- */
- hsa_ext_image_channel_order32_t channel_order;
-} hsa_ext_image_format_t;
-
-/**
- * @brief Implementation independent image descriptor.
- */
-typedef struct hsa_ext_image_descriptor_s {
- /**
- * Image geometry.
- */
- hsa_ext_image_geometry_t geometry;
- /**
- * Width of the image, in components.
- */
- size_t width;
- /**
- * Height of the image, in components. Only used if the geometry is
- * ::HSA_EXT_IMAGE_GEOMETRY_2D, ::HSA_EXT_IMAGE_GEOMETRY_3D,
- * HSA_EXT_IMAGE_GEOMETRY_2DA, HSA_EXT_IMAGE_GEOMETRY_2DDEPTH, or
- * HSA_EXT_IMAGE_GEOMETRY_2DADEPTH, otherwise must be 0.
- */
- size_t height;
- /**
- * Depth of the image, in components. Only used if the geometry is
- * ::HSA_EXT_IMAGE_GEOMETRY_3D, otherwise must be 0.
- */
- size_t depth;
- /**
- * Number of image layers in the image array. Only used if the geometry is
- * ::HSA_EXT_IMAGE_GEOMETRY_1DA, ::HSA_EXT_IMAGE_GEOMETRY_2DA, or
- * HSA_EXT_IMAGE_GEOMETRY_2DADEPTH, otherwise must be 0.
- */
- size_t array_size;
- /**
- * Image format.
- */
- hsa_ext_image_format_t format;
-} hsa_ext_image_descriptor_t;
-
-/**
- * @brief Image capability.
- */
-typedef enum {
- /**
- * Images of this geometry, format, and layout are not supported by
- * the agent.
- */
- HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED = 0x0,
- /**
- * Read-only images of this geometry, format, and layout are
- * supported by the agent.
- */
- HSA_EXT_IMAGE_CAPABILITY_READ_ONLY = 0x1,
- /**
- * Write-only images of this geometry, format, and layout are
- * supported by the agent.
- */
- HSA_EXT_IMAGE_CAPABILITY_WRITE_ONLY = 0x2,
- /**
- * Read-write images of this geometry, format, and layout are
- * supported by the agent.
- */
- HSA_EXT_IMAGE_CAPABILITY_READ_WRITE = 0x4,
- /**
- * @deprecated Images of this geometry, format, and layout can be accessed from
- * read-modify-write atomic operations in the agent.
- */
- HSA_EXT_IMAGE_CAPABILITY_READ_MODIFY_WRITE = 0x8,
- /**
- * Images of this geometry, format, and layout are guaranteed to
- * have a consistent data layout regardless of how they are
- * accessed by the associated agent.
- */
- HSA_EXT_IMAGE_CAPABILITY_ACCESS_INVARIANT_DATA_LAYOUT = 0x10
-} hsa_ext_image_capability_t;
-
-/**
- * @brief Image data layout.
- *
- * @details An image data layout denotes such aspects of image data
- * layout as tiling and organization of channels in memory. Some image
- * data layouts may only apply to specific image geometries, formats,
- * and access permissions. Different agents may support different
- * image layout identifiers, including vendor specific layouts. Note
- * that an agent may not support the same image data layout for
- * different access permissions to images with the same image
- * geometry, size, and format. If multiple agents support the same
- * image data layout then it is possible to use separate image handles
- * for each agent that references the same image data.
- */
-
-typedef enum {
- /**
- * An implementation specific opaque image data layout which can
- * vary depending on the agent, geometry, image format, image size,
- * and access permissions.
- */
- HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE = 0x0,
- /**
- * The image data layout is specified by the following rules in
- * ascending byte address order. For a 3D image, 2DA image array,
- * or 1DA image array, the image data is stored as a linear sequence
- * of adjacent 2D image slices, 2D images, or 1D images
- * respectively, spaced according to the slice pitch. Each 2D image
- * is stored as a linear sequence of adjacent image rows, spaced
- * according to the row pitch. Each 1D or 1DB image is stored as a
- * single image row. Each image row is stored as a linear sequence
- * of image elements. Each image element is stored as a linear
- * sequence of image components specified by the left to right
- * channel order definition. Each image component is stored using
- * the memory type specified by the channel type.
- *
- * The 1DB image geometry always uses the linear image data layout.
- */
- HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR = 0x1
-} hsa_ext_image_data_layout_t;
-
-/**
- * @brief Retrieve the supported image capabilities for a given combination of
- * agent, geometry, and image format for an image created with an opaque image
- * data layout.
- *
- * @param[in] agent Agent to be associated with the image handle.
- *
- * @param[in] geometry Geometry.
- *
- * @param[in] image_format Pointer to an image format. Must not be NULL.
- *
- * @param[out] capability_mask Pointer to a memory location where the HSA
- * runtime stores a bit-mask of supported image capability
- * (::hsa_ext_image_capability_t) values. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p image_format is
- * NULL, or @p capability_mask is NULL.
- */
-hsa_status_t HSA_API hsa_ext_image_get_capability(
- hsa_agent_t agent,
- hsa_ext_image_geometry_t geometry,
- const hsa_ext_image_format_t *image_format,
- uint32_t *capability_mask);
-
-/**
- * @brief Retrieve the supported image capabilities for a given combination of
- * agent, geometry, image format, and image layout for an image created with
- * an explicit image data layout.
- *
- * @param[in] agent Agent to be associated with the image handle.
- *
- * @param[in] geometry Geometry.
- *
- * @param[in] image_format Pointer to an image format. Must not be NULL.
- *
- * @param[in] image_data_layout The image data layout.
- * It is invalid to use ::HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE; use
- * ::hsa_ext_image_get_capability instead.
- *
- * @param[out] capability_mask Pointer to a memory location where the HSA
- * runtime stores a bit-mask of supported image capability
- * (::hsa_ext_image_capability_t) values. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p image_format is
- * NULL, @p image_data_layout is ::HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE,
- * or @p capability_mask is NULL.
- */
-hsa_status_t HSA_API hsa_ext_image_get_capability_with_layout(
- hsa_agent_t agent,
- hsa_ext_image_geometry_t geometry,
- const hsa_ext_image_format_t *image_format,
- hsa_ext_image_data_layout_t image_data_layout,
- uint32_t *capability_mask);
-
-/**
- * @brief Agent specific image size and alignment requirements, populated by
- * ::hsa_ext_image_data_get_info and ::hsa_ext_image_data_get_info_with_layout.
- */
-typedef struct hsa_ext_image_data_info_s {
- /**
- * Image data size, in bytes.
- */
- size_t size;
-
- /**
- * Image data alignment, in bytes. Must always be a power of 2.
- */
- size_t alignment;
-
-} hsa_ext_image_data_info_t;
-
-/**
- * @brief Retrieve the image data requirements for a given combination of agent, image
- * descriptor, and access permission for an image created with an opaque image
- * data layout.
- *
- * @details The optimal image data size and alignment requirements may
- * vary depending on the image attributes specified in @p
- * image_descriptor, the @p access_permission, and the @p agent. Also,
- * different implementations of the HSA runtime may return different
- * requirements for the same input values.
- *
- * The implementation must return the same image data requirements for
- * different access permissions with matching image descriptors as long
- * as ::hsa_ext_image_get_capability reports
- * ::HSA_EXT_IMAGE_CAPABILITY_ACCESS_INVARIANT_DATA_LAYOUT. Image
- * descriptors match if they have the same values, with the exception
- * that s-form channel orders match the corresponding non-s-form
- * channel order and vice versa.
- *
- * @param[in] agent Agent to be associated with the image handle.
- *
- * @param[in] image_descriptor Pointer to an image descriptor. Must not be NULL.
- *
- * @param[in] access_permission Access permission of the image when
- * accessed by @p agent. The access permission defines how the agent
- * is allowed to access the image and must match the corresponding
- * HSAIL image handle type. The @p agent must support the image format
- * specified in @p image_descriptor for the given @p
- * access_permission.
- *
- * @param[out] image_data_info Memory location where the runtime stores the
- * size and alignment requirements. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED The @p
- * agent does not support the image format specified by @p
- * image_descriptor with the specified @p access_permission.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED The agent
- * does not support the image dimensions specified by @p
- * image_descriptor with the specified @p access_permission.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p image_descriptor is NULL, @p
- * access_permission is not a valid access permission value, or @p
- * image_data_info is NULL.
- */
-hsa_status_t HSA_API hsa_ext_image_data_get_info(
- hsa_agent_t agent,
- const hsa_ext_image_descriptor_t *image_descriptor,
- hsa_access_permission_t access_permission,
- hsa_ext_image_data_info_t *image_data_info);
-
-/**
- * @brief Retrieve the image data requirements for a given combination of
- * image descriptor, access permission, image data layout, image data row pitch,
- * and image data slice pitch for an image created with an explicit image
- * data layout.
- *
- * @details The image data size and alignment requirements may vary
- * depending on the image attributes specified in @p image_descriptor,
- * the @p access_permission, and the image layout. However, different
- * implementations of the HSA runtime will return the same
- * requirements for the same input values.
- *
- * The implementation must return the same image data requirements for
- * different access permissions with matching image descriptors and
- * matching image layouts as long as ::hsa_ext_image_get_capability
- * reports
- * ::HSA_EXT_IMAGE_CAPABILITY_ACCESS_INVARIANT_DATA_LAYOUT. Image
- * descriptors match if they have the same values, with the exception
- * that s-form channel orders match the corresponding non-s-form
- * channel order and vice versa. Image layouts match if they are the
- * same image data layout and use the same image row and slice pitch
- * values.
- *
- * @param[in] image_descriptor Pointer to an image descriptor. Must not be NULL.
- *
- * @param[in] access_permission Access permission of the image when
- * accessed by an agent. The access permission defines how the agent
- * is allowed to access the image and must match the corresponding
- * HSAIL image handle type.
- *
- * @param[in] image_data_layout The image data layout to use.
- * It is invalid to use ::HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE; use
- * ::hsa_ext_image_data_get_info instead.
- *
- * @param[in] image_data_row_pitch The size in bytes for a single row
- * of the image in the image data. If 0 is specified then the default
- * row pitch value is used: image width * image element byte size.
- * The value used must be greater than or equal to the default row
- * pitch, and be a multiple of the image element byte size. For the
- * linear image layout it must also be a multiple of the image linear
- * row pitch alignment for the agents that will access the image data
- * using image instructions.
- *
- * @param[in] image_data_slice_pitch The size in bytes of a single
- * slice of a 3D image, or the size in bytes of each image layer in an
- * image array in the image data. If 0 is specified then the default
- * slice pitch value is used: row pitch * height if geometry is
- * ::HSA_EXT_IMAGE_GEOMETRY_3D, ::HSA_EXT_IMAGE_GEOMETRY_2DA, or
- * ::HSA_EXT_IMAGE_GEOMETRY_2DADEPTH; row pitch if geometry is
- * ::HSA_EXT_IMAGE_GEOMETRY_1DA; and 0 otherwise. The value used must
- * be 0 if the default slice pitch is 0, be greater than or equal to
- * the default slice pitch, and be a multiple of the row pitch.
- *
- * @param[out] image_data_info Memory location where the runtime stores the
- * size and alignment requirements. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED The image
- * format specified by @p image_descriptor is not supported for the
- * @p access_permission and @p image_data_layout specified.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED The image
- * dimensions specified by @p image_descriptor are not supported for
- * the @p access_permission and @p image_data_layout specified.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_PITCH_UNSUPPORTED The row and
- * slice pitch specified by @p image_data_row_pitch and @p
- * image_data_slice_pitch are invalid or not supported.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p image_descriptor is
- * NULL, @p image_data_layout is ::HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE,
- * or @p image_data_info is NULL.
- */
-hsa_status_t HSA_API hsa_ext_image_data_get_info_with_layout(
- hsa_agent_t agent,
- const hsa_ext_image_descriptor_t *image_descriptor,
- hsa_access_permission_t access_permission,
- hsa_ext_image_data_layout_t image_data_layout,
- size_t image_data_row_pitch,
- size_t image_data_slice_pitch,
- hsa_ext_image_data_info_t *image_data_info);
-
-/**
- * @brief Creates an agent specific image handle to an image with an
- * opaque image data layout.
- *
- * @details Images with an opaque image data layout created with
- * different access permissions but matching image descriptors and
- * same agent can share the same image data if
- * ::HSA_EXT_IMAGE_CAPABILITY_ACCESS_INVARIANT_DATA_LAYOUT is reported
- * by ::hsa_ext_image_get_capability for the image format specified in
- * the image descriptor. Image descriptors match if they have the same
- * values, with the exception that s-form channel orders match the
- * corresponding non-s-form channel order and vice versa.
- *
- * If necessary, an application can use image operations (import,
- * export, copy, clear) to prepare the image for the intended use
- * regardless of the access permissions.
- *
- * @param[in] agent agent to be associated with the image handle created.
- *
- * @param[in] image_descriptor Pointer to an image descriptor. Must not be NULL.
- *
- * @param[in] image_data Image data buffer that must have been allocated
- * according to the size and alignment requirements dictated by
- * ::hsa_ext_image_data_get_info. Must not be NULL.
- *
- * Any previous memory contents are preserved upon creation. The application is
- * responsible for ensuring that the lifetime of the image data exceeds that of
- * all the associated images.
- *
- * @param[in] access_permission Access permission of the image when
- * accessed by agent. The access permission defines how the agent
- * is allowed to access the image using the image handle created and
- * must match the corresponding HSAIL image handle type. The agent
- * must support the image format specified in @p image_descriptor for
- * the given @p access_permission.
- *
- * @param[out] image Pointer to a memory location where the HSA runtime stores
- * the newly created image handle. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED The agent
- * does not have the capability to support the image format contained
- * in @p image_descriptor using the specified @p access_permission.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED The agent
- * does not support the image dimensions specified by @p
- * image_descriptor using the specified @p access_permission.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
- * the required resources.
- *
- * support the creation of more image handles with the given @p access_permission).
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p image_descriptor is NULL, @p
- * image_data is NULL, @p image_data does not have a valid alignment,
- * @p access_permission is not a valid access permission
- * value, or @p image is NULL.
- */
-hsa_status_t HSA_API hsa_ext_image_create(
- hsa_agent_t agent,
- const hsa_ext_image_descriptor_t *image_descriptor,
- const void *image_data,
- hsa_access_permission_t access_permission,
- hsa_ext_image_t *image);
-
-/**
- * @brief Creates an agent specific image handle to an image with an explicit
- * image data layout.
- *
- * @details Images with an explicit image data layout created with
- * different access permissions but matching image descriptors and
- * matching image layout can share the same image data if
- * ::HSA_EXT_IMAGE_CAPABILITY_ACCESS_INVARIANT_DATA_LAYOUT is reported
- * by ::hsa_ext_image_get_capability_with_layout for the image format
- * specified in the image descriptor and specified image data
- * layout. Image descriptors match if they have the same values, with
- * the exception that s-form channel orders match the corresponding
- * non-s-form channel order and vice versa. Image layouts match if
- * they are the same image data layout and use the same image row and
- * slice values.
- *
- * If necessary, an application can use image operations (import, export, copy,
- * clear) to prepare the image for the intended use regardless of the access
- * permissions.
- *
- * @param[in] agent agent to be associated with the image handle created.
- *
- * @param[in] image_descriptor Pointer to an image descriptor. Must not be NULL.
- *
- * @param[in] image_data Image data buffer that must have been allocated
- * according to the size and alignment requirements dictated by
- * ::hsa_ext_image_data_get_info_with_layout. Must not be NULL.
- *
- * Any previous memory contents are preserved upon creation. The application is
- * responsible for ensuring that the lifetime of the image data exceeds that of
- * all the associated images.
- *
- * @param[in] access_permission Access permission of the image when
- * accessed by the agent. The access permission defines how the agent
- * is allowed to access the image and must match the corresponding
- * HSAIL image handle type. The agent must support the image format
- * specified in @p image_descriptor for the given @p access_permission
- * and @p image_data_layout.
- *
- * @param[in] image_data_layout The image data layout to use for the
- * @p image_data. It is invalid to use
- * ::HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE; use ::hsa_ext_image_create
- * instead.
- *
- * @param[in] image_data_row_pitch The size in bytes for a single row
- * of the image in the image data. If 0 is specified then the default
- * row pitch value is used: image width * image element byte size.
- * The value used must be greater than or equal to the default row
- * pitch, and be a multiple of the image element byte size. For the
- * linear image layout it must also be a multiple of the image linear
- * row pitch alignment for the agents that will access the image data
- * using image instructions.
- *
- * @param[in] image_data_slice_pitch The size in bytes of a single
- * slice of a 3D image, or the size in bytes of each image layer in an
- * image array in the image data. If 0 is specified then the default
- * slice pitch value is used: row pitch * height if geometry is
- * ::HSA_EXT_IMAGE_GEOMETRY_3D, ::HSA_EXT_IMAGE_GEOMETRY_2DA, or
- * ::HSA_EXT_IMAGE_GEOMETRY_2DADEPTH; row pitch if geometry is
- * ::HSA_EXT_IMAGE_GEOMETRY_1DA; and 0 otherwise. The value used must
- * be 0 if the default slice pitch is 0, be greater than or equal to
- * the default slice pitch, and be a multiple of the row pitch.
- *
- * @param[out] image Pointer to a memory location where the HSA runtime stores
- * the newly created image handle. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED The agent does
- * not have the capability to support the image format contained in the image
- * descriptor using the specified @p access_permission and @p image_data_layout.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED The agent
- * does not support the image dimensions specified by @p
- * image_descriptor using the specified @p access_permission and @p
- * image_data_layout.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_PITCH_UNSUPPORTED The agent does
- * not support the row and slice pitch specified by @p image_data_row_pitch
- * and @p image_data_slice_pitch, or the values are invalid.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
- * the required resources.
- *
- * support the creation of more image handles with the given @p access_permission).
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p image_descriptor is NULL, @p
- * image_data is NULL, @p image_data does not have a valid alignment,
- * @p image_data_layout is ::HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE,
- * or @p image is NULL.
- */
-hsa_status_t HSA_API hsa_ext_image_create_with_layout(
- hsa_agent_t agent,
- const hsa_ext_image_descriptor_t *image_descriptor,
- const void *image_data,
- hsa_access_permission_t access_permission,
- hsa_ext_image_data_layout_t image_data_layout,
- size_t image_data_row_pitch,
- size_t image_data_slice_pitch,
- hsa_ext_image_t *image);
-
-/**
- * @brief Destroy an image handle previously created using ::hsa_ext_image_create or
- * ::hsa_ext_image_create_with_layout.
- *
- * @details Destroying the image handle does not free the associated image data,
- * or modify its contents. The application should not destroy an image handle while
- * there are references to it queued for execution or currently being used in a
- * kernel dispatch.
- *
- * @param[in] agent Agent associated with the image handle.
- *
- * @param[in] image Image handle to destroy.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- */
-hsa_status_t HSA_API hsa_ext_image_destroy(
- hsa_agent_t agent,
- hsa_ext_image_t image);
-
-/**
- * @brief Copies a portion of one image (the source) to another image (the
- * destination).
- *
- * @details The source and destination image formats should be the
- * same, with the exception that s-form channel orders match the
- * corresponding non-s-form channel order and vice versa. For example,
- * it is allowed to copy a source image with a channel order of
- * HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB to a destination image with a
- * channel order of HSA_EXT_IMAGE_CHANNEL_ORDER_RGB.
- *
- * The source and destination images do not have to be of the same geometry and
- * appropriate scaling is performed by the HSA runtime. It is possible to copy
- * subregions between any combinations of source and destination geometries, provided
- * that the dimensions of the subregions are the same. For example, it is
- * allowed to copy a rectangular region from a 2D image to a slice of a 3D
- * image.
- *
- * If the source and destination image data overlap, or the combination of
- * offset and range references an out-out-bounds element in any of the images,
- * the behavior is undefined.
- *
- * @param[in] agent Agent associated with both the source and destination image handles.
- *
- * @param[in] src_image Image handle of source image. The agent associated with the source
- * image handle must be identical to that of the destination image.
- *
- * @param[in] src_offset Pointer to the offset within the source image where to
- * copy the data from. Must not be NULL.
- *
- * @param[in] dst_image Image handle of destination image.
- *
- * @param[in] dst_offset Pointer to the offset within the destination
- * image where to copy the data. Must not be NULL.
- *
- * @param[in] range Dimensions of the image portion to be copied. The HSA
- * runtime computes the size of the image data to be copied using this
- * argument. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p src_offset is
- * NULL, @p dst_offset is NULL, or @p range is NULL.
- */
-hsa_status_t HSA_API hsa_ext_image_copy(
- hsa_agent_t agent,
- hsa_ext_image_t src_image,
- const hsa_dim3_t* src_offset,
- hsa_ext_image_t dst_image,
- const hsa_dim3_t* dst_offset,
- const hsa_dim3_t* range);
-
-/**
- * @brief Image region.
- */
-typedef struct hsa_ext_image_region_s {
- /**
- * Offset within an image (in coordinates).
- */
- hsa_dim3_t offset;
-
- /**
- * Dimension size of the image range (in coordinates). The x, y, and z dimensions
- * correspond to width, height, and depth or index respectively.
- */
- hsa_dim3_t range;
-} hsa_ext_image_region_t;
-
-/**
- * @brief Import a linearly organized image data from memory directly to an
- * image handle.
- *
- * @details This operation updates the image data referenced by the image handle
- * from the source memory. The size of the data imported from memory is
- * implicitly derived from the image region.
- *
- * It is the application's responsibility to avoid out of bounds memory access.
- *
- * None of the source memory or destination image data memory can
- * overlap. Overlapping of any of the source and destination image
- * data memory within the import operation produces undefined results.
- *
- * @param[in] agent Agent associated with the image handle.
- *
- * @param[in] src_memory Source memory. Must not be NULL.
- *
- * @param[in] src_row_pitch The size in bytes of a single row of the image in the
- * source memory. If the value is smaller than the destination image region
- * width * image element byte size, then region width * image element byte
- * size is used.
- *
- * @param[in] src_slice_pitch The size in bytes of a single 2D slice of a 3D image,
- * or the size in bytes of each image layer in an image array in the source memory.
- * If the geometry is ::HSA_EXT_IMAGE_GEOMETRY_1DA and the value is smaller than the
- * value used for @p src_row_pitch, then the value used for @p src_row_pitch is used.
- * If the geometry is ::HSA_EXT_IMAGE_GEOMETRY_3D, ::HSA_EXT_IMAGE_GEOMETRY_2DA, or
- * HSA_EXT_IMAGE_GEOMETRY_2DADEPTH and the value is smaller than the value used for
- * @p src_row_pitch * destination image region height, then the value used for
- * @p src_row_pitch * destination image region height is used.
- * Otherwise, the value is not used.
- *
- * @param[in] dst_image Image handle of destination image.
- *
- * @param[in] image_region Pointer to the image region to be updated. Must not
- * be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p src_memory is NULL, or @p
- * image_region is NULL.
- *
- */
-hsa_status_t HSA_API hsa_ext_image_import(
- hsa_agent_t agent,
- const void *src_memory,
- size_t src_row_pitch,
- size_t src_slice_pitch,
- hsa_ext_image_t dst_image,
- const hsa_ext_image_region_t *image_region);
-
-/**
- * @brief Export the image data to linearly organized memory.
- *
- * @details The operation updates the destination memory with the image data of
- * @p src_image. The size of the data exported to memory is implicitly derived
- * from the image region.
- *
- * It is the application's responsibility to avoid out of bounds memory access.
- *
- * None of the destination memory or source image data memory can
- * overlap. Overlapping of any of the source and destination image
- * data memory within the export operation produces undefined results.
- *
- * @param[in] agent Agent associated with the image handle.
- *
- * @param[in] src_image Image handle of source image.
- *
- * @param[in] dst_memory Destination memory. Must not be NULL.
- *
- * @param[in] dst_row_pitch The size in bytes of a single row of the image in the
- * destination memory. If the value is smaller than the source image region
- * width * image element byte size, then region width * image element byte
- * size is used.
- *
- * @param[in] dst_slice_pitch The size in bytes of a single 2D slice of a 3D image,
- * or the size in bytes of each image in an image array in the destination memory.
- * If the geometry is ::HSA_EXT_IMAGE_GEOMETRY_1DA and the value is smaller than the
- * value used for @p dst_row_pitch, then the value used for @p dst_row_pitch is used.
- * If the geometry is ::HSA_EXT_IMAGE_GEOMETRY_3D, ::HSA_EXT_IMAGE_GEOMETRY_2DA, or
- * HSA_EXT_IMAGE_GEOMETRY_2DADEPTH and the value is smaller than the value used for
- * @p dst_row_pitch * source image region height, then the value used for
- * @p dst_row_pitch * source image region height is used.
- * Otherwise, the value is not used.
- *
- * @param[in] image_region Pointer to the image region to be exported. Must not
- * be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p dst_memory is NULL, or @p
- * image_region is NULL.
- */
-hsa_status_t HSA_API hsa_ext_image_export(
- hsa_agent_t agent,
- hsa_ext_image_t src_image,
- void *dst_memory,
- size_t dst_row_pitch,
- size_t dst_slice_pitch,
- const hsa_ext_image_region_t *image_region);
-
-/**
- * @brief Clear a region of an image so that every image element has
- * the specified value.
- *
- * @param[in] agent Agent associated with the image handle.
- *
- * @param[in] image Image handle for image to be cleared.
- *
- * @param[in] data The value to which to set each image element being
- * cleared. It is specified as an array of image component values. The
- * number of array elements must match the number of access components
- * for the image channel order. The type of each array element must
- * match the image access type of the image channel type. When the
- * value is used to set the value of an image element, the conversion
- * method corresponding to the image channel type is used. See the
- * <em>Channel Order</em> section and <em>Channel Type</em> section in
- * the <em>HSA Programming Reference Manual</em> for more
- * information. Must not be NULL.
- *
- * @param[in] image_region Pointer to the image region to clear. Must not be
- * NULL. If the region references an out-out-bounds element, the behavior is
- * undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p data is NULL, or @p
- * image_region is NULL.
- */
-hsa_status_t HSA_API hsa_ext_image_clear(
- hsa_agent_t agent,
- hsa_ext_image_t image,
- const void* data,
- const hsa_ext_image_region_t *image_region);
-
-/**
- * @brief Sampler handle. Samplers are populated by
- * ::hsa_ext_sampler_create. Sampler handles are only unique within an
- * agent, not across agents.
- */
-typedef struct hsa_ext_sampler_s {
- /**
- * Opaque handle. For a given agent, two handles reference the same object of
- * the enclosing type if and only if they are equal.
- */
- uint64_t handle;
-} hsa_ext_sampler_t;
-
-/**
- * @brief Sampler address modes. The sampler address mode describes
- * the processing of out-of-range image coordinates. See the
- * <em>Addressing Mode</em> section in the <em>HSA Programming Reference
- * Manual</em> for definitions on each address mode. The values
- * match the BRIG type @p hsa_ext_brig_sampler_addressing_t.
- */
-typedef enum {
- /**
- * Out-of-range coordinates are not handled.
- */
- HSA_EXT_SAMPLER_ADDRESSING_MODE_UNDEFINED = 0,
-
- /**
- * Clamp out-of-range coordinates to the image edge.
- */
- HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE = 1,
-
- /**
- * Clamp out-of-range coordinates to the image border color.
- */
- HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_BORDER = 2,
-
- /**
- * Wrap out-of-range coordinates back into the valid coordinate
- * range so the image appears as repeated tiles.
- */
- HSA_EXT_SAMPLER_ADDRESSING_MODE_REPEAT = 3,
-
- /**
- * Mirror out-of-range coordinates back into the valid coordinate
- * range so the image appears as repeated tiles with every other
- * tile a reflection.
- */
- HSA_EXT_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT = 4
-
-} hsa_ext_sampler_addressing_mode_t;
-
-/**
- * @brief A fixed-size type used to represent ::hsa_ext_sampler_addressing_mode_t constants.
- */
-typedef uint32_t hsa_ext_sampler_addressing_mode32_t;
-
-/**
- * @brief Sampler coordinate normalization modes. See the
- * <em>Coordinate Normalization Mode</em> section in the <em>HSA
- * Programming Reference Manual</em> for definitions on each
- * coordinate normalization mode. The values match the BRIG type @p
- * hsa_ext_brig_sampler_coord_normalization_t.
- */
-typedef enum {
-
- /**
- * Coordinates are used to directly address an image element.
- */
- HSA_EXT_SAMPLER_COORDINATE_MODE_UNNORMALIZED = 0,
-
- /**
- * Coordinates are scaled by the image dimension size before being
- * used to address an image element.
- */
- HSA_EXT_SAMPLER_COORDINATE_MODE_NORMALIZED = 1
-
-} hsa_ext_sampler_coordinate_mode_t;
-
-/**
- * @brief A fixed-size type used to represent ::hsa_ext_sampler_coordinate_mode_t constants.
- */
-typedef uint32_t hsa_ext_sampler_coordinate_mode32_t;
-
-
-/**
- * @brief Sampler filter modes. See the <em>Filter Mode</em> section
- * in the <em>HSA Programming Reference Manual</em> for definitions
- * on each address mode. The enumeration values match the BRIG type @p
- * hsa_ext_brig_sampler_filter_t.
- */
-typedef enum {
- /**
- * Filter to the image element nearest (in Manhattan distance) to the
- * specified coordinate.
- */
- HSA_EXT_SAMPLER_FILTER_MODE_NEAREST = 0,
-
- /**
- * Filter to the image element calculated by combining the elements in a 2x2
- * square block or 2x2x2 cube block around the specified coordinate. The
- * elements are combined using linear interpolation.
- */
- HSA_EXT_SAMPLER_FILTER_MODE_LINEAR = 1
-
-} hsa_ext_sampler_filter_mode_t;
-
-/**
- * @brief A fixed-size type used to represent ::hsa_ext_sampler_filter_mode_t constants.
- */
-typedef uint32_t hsa_ext_sampler_filter_mode32_t;
-
-/**
- * @brief Implementation independent sampler descriptor.
- */
-typedef struct hsa_ext_sampler_descriptor_s {
- /**
- * Sampler coordinate mode describes the normalization of image coordinates.
- */
- hsa_ext_sampler_coordinate_mode32_t coordinate_mode;
-
- /**
- * Sampler filter type describes the type of sampling performed.
- */
- hsa_ext_sampler_filter_mode32_t filter_mode;
-
- /**
- * Sampler address mode describes the processing of out-of-range image
- * coordinates.
- */
- hsa_ext_sampler_addressing_mode32_t address_mode;
-
-} hsa_ext_sampler_descriptor_t;
-
-/**
- * @brief Create an agent specific sampler handle for a given agent
- * independent sampler descriptor and agent.
- *
- * @param[in] agent Agent to be associated with the sampler handle created.
- *
- * @param[in] sampler_descriptor Pointer to a sampler descriptor. Must not be
- * NULL.
- *
- * @param[out] sampler Memory location where the HSA runtime stores the newly
- * created sampler handle. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_SAMPLER_DESCRIPTOR_UNSUPPORTED The
- * @p agent does not have the capability to support the properties
- * specified by @p sampler_descriptor or it is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
- * the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p sampler_descriptor is NULL, or
- * @p sampler is NULL.
- */
-hsa_status_t HSA_API hsa_ext_sampler_create(
- hsa_agent_t agent,
- const hsa_ext_sampler_descriptor_t *sampler_descriptor,
- hsa_ext_sampler_t *sampler);
-
-/**
- * @brief Destroy a sampler handle previously created using ::hsa_ext_sampler_create.
- *
- * @details The sampler handle should not be destroyed while there are
- * references to it queued for execution or currently being used in a
- * kernel dispatch.
- *
- * @param[in] agent Agent associated with the sampler handle.
- *
- * @param[in] sampler Sampler handle to destroy.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- */
-hsa_status_t HSA_API hsa_ext_sampler_destroy(
- hsa_agent_t agent,
- hsa_ext_sampler_t sampler);
-
-
-#define hsa_ext_images_1_00
-
-/**
- * @brief The function pointer table for the images v1.00 extension. Can be returned by ::hsa_system_get_extension_table or ::hsa_system_get_major_extension_table.
- */
-typedef struct hsa_ext_images_1_00_pfn_s {
-
- hsa_status_t (*hsa_ext_image_get_capability)(
- hsa_agent_t agent,
- hsa_ext_image_geometry_t geometry,
- const hsa_ext_image_format_t *image_format,
- uint32_t *capability_mask);
-
- hsa_status_t (*hsa_ext_image_data_get_info)(
- hsa_agent_t agent,
- const hsa_ext_image_descriptor_t *image_descriptor,
- hsa_access_permission_t access_permission,
- hsa_ext_image_data_info_t *image_data_info);
-
- hsa_status_t (*hsa_ext_image_create)(
- hsa_agent_t agent,
- const hsa_ext_image_descriptor_t *image_descriptor,
- const void *image_data,
- hsa_access_permission_t access_permission,
- hsa_ext_image_t *image);
-
- hsa_status_t (*hsa_ext_image_destroy)(
- hsa_agent_t agent,
- hsa_ext_image_t image);
-
- hsa_status_t (*hsa_ext_image_copy)(
- hsa_agent_t agent,
- hsa_ext_image_t src_image,
- const hsa_dim3_t* src_offset,
- hsa_ext_image_t dst_image,
- const hsa_dim3_t* dst_offset,
- const hsa_dim3_t* range);
-
- hsa_status_t (*hsa_ext_image_import)(
- hsa_agent_t agent,
- const void *src_memory,
- size_t src_row_pitch,
- size_t src_slice_pitch,
- hsa_ext_image_t dst_image,
- const hsa_ext_image_region_t *image_region);
-
- hsa_status_t (*hsa_ext_image_export)(
- hsa_agent_t agent,
- hsa_ext_image_t src_image,
- void *dst_memory,
- size_t dst_row_pitch,
- size_t dst_slice_pitch,
- const hsa_ext_image_region_t *image_region);
-
- hsa_status_t (*hsa_ext_image_clear)(
- hsa_agent_t agent,
- hsa_ext_image_t image,
- const void* data,
- const hsa_ext_image_region_t *image_region);
-
- hsa_status_t (*hsa_ext_sampler_create)(
- hsa_agent_t agent,
- const hsa_ext_sampler_descriptor_t *sampler_descriptor,
- hsa_ext_sampler_t *sampler);
-
- hsa_status_t (*hsa_ext_sampler_destroy)(
- hsa_agent_t agent,
- hsa_ext_sampler_t sampler);
-
-} hsa_ext_images_1_00_pfn_t;
-
-#define hsa_ext_images_1
-
-/**
- * @brief The function pointer table for the images v1 extension. Can be returned by ::hsa_system_get_extension_table or ::hsa_system_get_major_extension_table.
- */
-typedef struct hsa_ext_images_1_pfn_s {
-
- hsa_status_t (*hsa_ext_image_get_capability)(
- hsa_agent_t agent,
- hsa_ext_image_geometry_t geometry,
- const hsa_ext_image_format_t *image_format,
- uint32_t *capability_mask);
-
- hsa_status_t (*hsa_ext_image_data_get_info)(
- hsa_agent_t agent,
- const hsa_ext_image_descriptor_t *image_descriptor,
- hsa_access_permission_t access_permission,
- hsa_ext_image_data_info_t *image_data_info);
-
- hsa_status_t (*hsa_ext_image_create)(
- hsa_agent_t agent,
- const hsa_ext_image_descriptor_t *image_descriptor,
- const void *image_data,
- hsa_access_permission_t access_permission,
- hsa_ext_image_t *image);
-
- hsa_status_t (*hsa_ext_image_destroy)(
- hsa_agent_t agent,
- hsa_ext_image_t image);
-
- hsa_status_t (*hsa_ext_image_copy)(
- hsa_agent_t agent,
- hsa_ext_image_t src_image,
- const hsa_dim3_t* src_offset,
- hsa_ext_image_t dst_image,
- const hsa_dim3_t* dst_offset,
- const hsa_dim3_t* range);
-
- hsa_status_t (*hsa_ext_image_import)(
- hsa_agent_t agent,
- const void *src_memory,
- size_t src_row_pitch,
- size_t src_slice_pitch,
- hsa_ext_image_t dst_image,
- const hsa_ext_image_region_t *image_region);
-
- hsa_status_t (*hsa_ext_image_export)(
- hsa_agent_t agent,
- hsa_ext_image_t src_image,
- void *dst_memory,
- size_t dst_row_pitch,
- size_t dst_slice_pitch,
- const hsa_ext_image_region_t *image_region);
-
- hsa_status_t (*hsa_ext_image_clear)(
- hsa_agent_t agent,
- hsa_ext_image_t image,
- const void* data,
- const hsa_ext_image_region_t *image_region);
-
- hsa_status_t (*hsa_ext_sampler_create)(
- hsa_agent_t agent,
- const hsa_ext_sampler_descriptor_t *sampler_descriptor,
- hsa_ext_sampler_t *sampler);
-
- hsa_status_t (*hsa_ext_sampler_destroy)(
- hsa_agent_t agent,
- hsa_ext_sampler_t sampler);
-
- hsa_status_t (*hsa_ext_image_get_capability_with_layout)(
- hsa_agent_t agent,
- hsa_ext_image_geometry_t geometry,
- const hsa_ext_image_format_t *image_format,
- hsa_ext_image_data_layout_t image_data_layout,
- uint32_t *capability_mask);
-
- hsa_status_t (*hsa_ext_image_data_get_info_with_layout)(
- hsa_agent_t agent,
- const hsa_ext_image_descriptor_t *image_descriptor,
- hsa_access_permission_t access_permission,
- hsa_ext_image_data_layout_t image_data_layout,
- size_t image_data_row_pitch,
- size_t image_data_slice_pitch,
- hsa_ext_image_data_info_t *image_data_info);
-
- hsa_status_t (*hsa_ext_image_create_with_layout)(
- hsa_agent_t agent,
- const hsa_ext_image_descriptor_t *image_descriptor,
- const void *image_data,
- hsa_access_permission_t access_permission,
- hsa_ext_image_data_layout_t image_data_layout,
- size_t image_data_row_pitch,
- size_t image_data_slice_pitch,
- hsa_ext_image_t *image);
-
-} hsa_ext_images_1_pfn_t;
-/** @} */
-
-#ifdef __cplusplus
-} // end extern "C" block
-#endif /*__cplusplus*/
-
-#endif
diff --git a/third_party/rocm/include/hsa/hsa_ven_amd_aqlprofile.h b/third_party/rocm/include/hsa/hsa_ven_amd_aqlprofile.h
deleted file mode 100644
index fb763c0..0000000
--- a/third_party/rocm/include/hsa/hsa_ven_amd_aqlprofile.h
+++ /dev/null
@@ -1,355 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-//
-// The University of Illinois/NCSA
-// Open Source License (NCSA)
-//
-// Copyright (c) 2017-2020, Advanced Micro Devices, Inc. All rights reserved.
-//
-// Developed by:
-//
-// AMD Research and AMD HSA Software Development
-//
-// Advanced Micro Devices, Inc.
-//
-// www.amd.com
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to
-// deal with the Software without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-// and/or sell copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following conditions:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimers.
-// - Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimers in
-// the documentation and/or other materials provided with the distribution.
-// - Neither the names of Advanced Micro Devices, Inc,
-// nor the names of its contributors may be used to endorse or promote
-// products derived from this Software without specific prior written
-// permission.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-// DEALINGS WITH THE SOFTWARE.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef OPENSRC_HSA_RUNTIME_INC_HSA_VEN_AMD_AQLPROFILE_H_
-#define OPENSRC_HSA_RUNTIME_INC_HSA_VEN_AMD_AQLPROFILE_H_
-
-#include <stdint.h>
-#include "hsa.h"
-
-#define HSA_AQLPROFILE_VERSION_MAJOR 2
-#define HSA_AQLPROFILE_VERSION_MINOR 0
-
-#ifdef __cplusplus
-extern "C" {
-#endif // __cplusplus
-
-////////////////////////////////////////////////////////////////////////////////
-// Library version
-uint32_t hsa_ven_amd_aqlprofile_version_major();
-uint32_t hsa_ven_amd_aqlprofile_version_minor();
-
-///////////////////////////////////////////////////////////////////////
-// Library API:
-// The library provides helper methods for instantiation of
-// the profile context object and for populating of the start
-// and stop AQL packets. The profile object contains a profiling
-// events list and needed for profiling buffers descriptors,
-// a command buffer and an output data buffer. To check if there
-// was an error the library methods return a status code. Also
-// the library provides methods for querying required buffers
-// attributes, to validate the event attributes and to get profiling
-// output data.
-//
-// Returned status:
-// hsa_status_t – HSA status codes are used from hsa.h header
-//
-// Supported profiling features:
-//
-// Supported profiling events
-typedef enum {
- HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC = 0,
- HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_TRACE = 1,
-} hsa_ven_amd_aqlprofile_event_type_t;
-
-// Supported performance counters (PMC) blocks
-// The block ID is the same for a block instances set, for example
-// each block instance from the TCC block set, TCC0, TCC1, …, TCCN
-// will have the same block ID HSA_VEN_AMD_AQLPROFILE_BLOCKS_TCC.
-typedef enum {
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC = 0,
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPF = 1,
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GDS = 2,
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBM = 3,
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBMSE = 4,
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SPI = 5,
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ = 6,
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQCS = 7,
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SRBM = 8,
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SX = 9,
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA = 10,
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCA = 11,
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC = 12,
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP = 13,
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD = 14,
- // Memory related blocks
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCARB = 15,
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCHUB = 16,
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCMCBVM = 17,
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCSEQ = 18,
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCVML2 = 19,
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCXBAR = 20,
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATC = 21,
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATCL2 = 22,
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCEA = 23,
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_RPB = 24,
- // System blocks
- HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SDMA = 25,
-
- HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER
-} hsa_ven_amd_aqlprofile_block_name_t;
-
-// PMC event object structure
-// ‘counter_id’ value is specified in GFXIPs perfcounter user guides
-// which is the counters select value, “Performance Counters Selection”
-// chapter.
-typedef struct {
- hsa_ven_amd_aqlprofile_block_name_t block_name;
- uint32_t block_index;
- uint32_t counter_id;
-} hsa_ven_amd_aqlprofile_event_t;
-
-// Check if event is valid for the specific GPU
-hsa_status_t hsa_ven_amd_aqlprofile_validate_event(
- hsa_agent_t agent, // HSA handle for the profiling GPU
- const hsa_ven_amd_aqlprofile_event_t* event, // [in] Pointer on validated event
- bool* result); // [out] True if the event valid, False otherwise
-
-// Profiling parameters
-// All parameters are generic and if not applicable for a specific
-// profile configuration then error status will be returned.
-typedef enum {
- // Trace applicable parameters
- HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET = 0,
- HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK = 1,
- HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK = 2,
- HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK = 3,
- HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2 = 4,
- HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SE_MASK = 5,
- HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SAMPLE_RATE = 6,
- HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_K_CONCURRENT = 7,
-} hsa_ven_amd_aqlprofile_parameter_name_t;
-
-// Profile parameter object
-typedef struct {
- hsa_ven_amd_aqlprofile_parameter_name_t parameter_name;
- uint32_t value;
-} hsa_ven_amd_aqlprofile_parameter_t;
-
-//
-// Profile context object:
-// The library provides a profile object structure which contains
-// the events array, a buffer for the profiling start/stop commands
-// and a buffer for the output data.
-// The buffers are specified by the buffer descriptors and allocated
-// by the application. The buffers allocation attributes, the command
-// buffer size, the PMC output buffer size as well as profiling output
-// data can be get using the generic get profile info helper _get_info.
-//
-// Buffer descriptor
-typedef struct {
- void* ptr;
- uint32_t size;
-} hsa_ven_amd_aqlprofile_descriptor_t;
-
-// Profile context object structure, contains profiling events list and
-// needed for profiling buffers descriptors, a command buffer and
-// an output data buffer
-typedef struct {
- hsa_agent_t agent; // GFXIP handle
- hsa_ven_amd_aqlprofile_event_type_t type; // Events type
- const hsa_ven_amd_aqlprofile_event_t* events; // Events array
- uint32_t event_count; // Events count
- const hsa_ven_amd_aqlprofile_parameter_t* parameters; // Parameters array
- uint32_t parameter_count; // Parameters count
- hsa_ven_amd_aqlprofile_descriptor_t output_buffer; // Output buffer
- hsa_ven_amd_aqlprofile_descriptor_t command_buffer; // PM4 commands
-} hsa_ven_amd_aqlprofile_profile_t;
-
-//
-// AQL packets populating methods:
-// The helper methods to populate provided by the application START and
-// STOP AQL packets which the application is required to submit before and
-// after profiled GPU task packets respectively.
-//
-// AQL Vendor Specific packet which carries a PM4 command
-typedef struct {
- uint16_t header;
- uint16_t pm4_command[27];
- hsa_signal_t completion_signal;
-} hsa_ext_amd_aql_pm4_packet_t;
-
-// Method to populate the provided AQL packet with profiling start commands
-// Only 'pm4_command' fields of the packet are set and the application
-// is responsible to set Vendor Specific header type a completion signal
-hsa_status_t hsa_ven_amd_aqlprofile_start(
- hsa_ven_amd_aqlprofile_profile_t* profile, // [in/out] profile contex object
- hsa_ext_amd_aql_pm4_packet_t* aql_start_packet); // [out] profile start AQL packet
-
-// Method to populate the provided AQL packet with profiling stop commands
-// Only 'pm4_command' fields of the packet are set and the application
-// is responsible to set Vendor Specific header type and a completion signal
-hsa_status_t hsa_ven_amd_aqlprofile_stop(
- const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile contex object
- hsa_ext_amd_aql_pm4_packet_t* aql_stop_packet); // [out] profile stop AQL packet
-
-// Method to populate the provided AQL packet with profiling read commands
-// Only 'pm4_command' fields of the packet are set and the application
-// is responsible to set Vendor Specific header type and a completion signal
-hsa_status_t hsa_ven_amd_aqlprofile_read(
- const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile contex object
- hsa_ext_amd_aql_pm4_packet_t* aql_read_packet); // [out] profile stop AQL packet
-
-// Legacy devices, PM4 profiling packet size
-const unsigned HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE = 192;
-// Legacy devices, converting the profiling AQL packet to PM4 packet blob
-hsa_status_t hsa_ven_amd_aqlprofile_legacy_get_pm4(
- const hsa_ext_amd_aql_pm4_packet_t* aql_packet, // [in] AQL packet
- void* data); // [out] PM4 packet blob
-
-//
-// Get profile info:
-// Generic method for getting various profile info including profile buffers
-// attributes like the command buffer size and the profiling PMC results.
-// It’s implied that all counters are 64bit values.
-//
-// Profile generic output data:
-typedef struct {
- uint32_t sample_id; // PMC sample or trace buffer index
- union {
- struct {
- hsa_ven_amd_aqlprofile_event_t event; // PMC event
- uint64_t result; // PMC result
- } pmc_data;
- hsa_ven_amd_aqlprofile_descriptor_t trace_data; // Trace output data descriptor
- };
-} hsa_ven_amd_aqlprofile_info_data_t;
-
-// ID query type
-typedef struct {
- const char* name;
- uint32_t id;
- uint32_t instance_count;
-} hsa_ven_amd_aqlprofile_id_query_t;
-
-// Profile attributes
-typedef enum {
- HSA_VEN_AMD_AQLPROFILE_INFO_COMMAND_BUFFER_SIZE = 0, // get_info returns uint32_t value
- HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA_SIZE = 1, // get_info returns uint32_t value
- HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA = 2, // get_info returns PMC uint64_t value
- // in info_data object
- HSA_VEN_AMD_AQLPROFILE_INFO_TRACE_DATA = 3, // get_info returns trace buffer ptr/size
- // in info_data object
- //
- HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_COUNTERS = 4, // get_info returns number of block counter
- HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID = 5, // get_info returns block id, instances
- // by name string using _id_query_t
- //
- HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD = 6, // get_info returns size/pointer for
- // counters enable command buffer
- HSA_VEN_AMD_AQLPROFILE_INFO_DISABLE_CMD = 7, // get_info returns size/pointer for
- // counters disable command buffer
-} hsa_ven_amd_aqlprofile_info_type_t;
-
-// Definition of output data iterator callback
-typedef hsa_status_t (*hsa_ven_amd_aqlprofile_data_callback_t)(
- hsa_ven_amd_aqlprofile_info_type_t info_type, // [in] data type, PMC or trace data
- hsa_ven_amd_aqlprofile_info_data_t* info_data, // [in] info_data object
- void* callback_data); // [in/out] data passed to the callback
-
-// Method for getting the profile info
-hsa_status_t hsa_ven_amd_aqlprofile_get_info(
- const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile context object
- hsa_ven_amd_aqlprofile_info_type_t attribute, // [in] requested profile attribute
- void* value); // [in/out] returned value
-
-// Method for iterating the events output data
-hsa_status_t hsa_ven_amd_aqlprofile_iterate_data(
- const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile context object
- hsa_ven_amd_aqlprofile_data_callback_t callback, // [in] callback to iterate the output data
- void* data); // [in/out] data passed to the callback
-
-// Return error string
-hsa_status_t hsa_ven_amd_aqlprofile_error_string(
- const char** str); // [out] pointer on the error string
-
-/**
- * @brief Extension version.
- */
-#define hsa_ven_amd_aqlprofile_VERSION_MAJOR 1
-#define hsa_ven_amd_aqlprofile_LIB(suff) "libhsa-amd-aqlprofile" suff ".so"
-
-#ifdef HSA_LARGE_MODEL
-static const char kAqlProfileLib[] = hsa_ven_amd_aqlprofile_LIB("64");
-#else
-static const char kAqlProfileLib[] = hsa_ven_amd_aqlprofile_LIB("");
-#endif
-
-/**
- * @brief Extension function table.
- */
-typedef struct hsa_ven_amd_aqlprofile_1_00_pfn_s {
- uint32_t (*hsa_ven_amd_aqlprofile_version_major)();
- uint32_t (*hsa_ven_amd_aqlprofile_version_minor)();
-
- hsa_status_t (*hsa_ven_amd_aqlprofile_error_string)(
- const char** str);
-
- hsa_status_t (*hsa_ven_amd_aqlprofile_validate_event)(
- hsa_agent_t agent,
- const hsa_ven_amd_aqlprofile_event_t* event,
- bool* result);
-
- hsa_status_t (*hsa_ven_amd_aqlprofile_start)(
- hsa_ven_amd_aqlprofile_profile_t* profile,
- hsa_ext_amd_aql_pm4_packet_t* aql_start_packet);
-
- hsa_status_t (*hsa_ven_amd_aqlprofile_stop)(
- const hsa_ven_amd_aqlprofile_profile_t* profile,
- hsa_ext_amd_aql_pm4_packet_t* aql_stop_packet);
-
- hsa_status_t (*hsa_ven_amd_aqlprofile_read)(
- const hsa_ven_amd_aqlprofile_profile_t* profile,
- hsa_ext_amd_aql_pm4_packet_t* aql_read_packet);
-
- hsa_status_t (*hsa_ven_amd_aqlprofile_legacy_get_pm4)(
- const hsa_ext_amd_aql_pm4_packet_t* aql_packet,
- void* data);
-
- hsa_status_t (*hsa_ven_amd_aqlprofile_get_info)(
- const hsa_ven_amd_aqlprofile_profile_t* profile,
- hsa_ven_amd_aqlprofile_info_type_t attribute,
- void* value);
-
- hsa_status_t (*hsa_ven_amd_aqlprofile_iterate_data)(
- const hsa_ven_amd_aqlprofile_profile_t* profile,
- hsa_ven_amd_aqlprofile_data_callback_t callback,
- void* data);
-} hsa_ven_amd_aqlprofile_1_00_pfn_t;
-
-typedef hsa_ven_amd_aqlprofile_1_00_pfn_t hsa_ven_amd_aqlprofile_pfn_t;
-
-#ifdef __cplusplus
-}
-#endif // __cplusplus
-
-#endif // OPENSRC_HSA_RUNTIME_INC_HSA_VEN_AMD_AQLPROFILE_H_
diff --git a/third_party/rocm/include/hsa/hsa_ven_amd_loader.h b/third_party/rocm/include/hsa/hsa_ven_amd_loader.h
deleted file mode 100644
index 3ce8475..0000000
--- a/third_party/rocm/include/hsa/hsa_ven_amd_loader.h
+++ /dev/null
@@ -1,589 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-//
-// The University of Illinois/NCSA
-// Open Source License (NCSA)
-//
-// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
-//
-// Developed by:
-//
-// AMD Research and AMD HSA Software Development
-//
-// Advanced Micro Devices, Inc.
-//
-// www.amd.com
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to
-// deal with the Software without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-// and/or sell copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following conditions:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimers.
-// - Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimers in
-// the documentation and/or other materials provided with the distribution.
-// - Neither the names of Advanced Micro Devices, Inc,
-// nor the names of its contributors may be used to endorse or promote
-// products derived from this Software without specific prior written
-// permission.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-// DEALINGS WITH THE SOFTWARE.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-// HSA AMD extension for additional loader functionality.
-
-#ifndef HSA_VEN_AMD_LOADER_H
-#define HSA_VEN_AMD_LOADER_H
-
-#include "hsa.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/**
- * @brief Queries equivalent host address for given @p device_address, and
- * records it in @p host_address.
- *
- *
- * @details Contents of memory pointed to by @p host_address would be identical
- * to contents of memory pointed to by @p device_address. Only difference
- * between the two is host accessibility: @p host_address is always accessible
- * from host, @p device_address might not be accessible from host.
- *
- * If @p device_address already points to host accessible memory, then the value
- * of @p device_address is simply copied into @p host_address.
- *
- * The lifetime of @p host_address is the same as the lifetime of @p
- * device_address, and both lifetimes are limited by the lifetime of the
- * executable that is managing these addresses.
- *
- *
- * @param[in] device_address Device address to query equivalent host address
- * for.
- *
- * @param[out] host_address Pointer to application-allocated buffer to record
- * queried equivalent host address in.
- *
- *
- * @retval HSA_STATUS_SUCCESS Function is executed successfully.
- *
- * @retval HSA_STATUS_ERROR_NOT_INITIALIZED Runtime is not initialized.
- *
- * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p device_address is invalid or
- * null, or @p host_address is null.
- */
-hsa_status_t hsa_ven_amd_loader_query_host_address(
- const void *device_address,
- const void **host_address);
-
-/**
- * @brief The storage type of the code object that is backing loaded memory
- * segment.
- */
-typedef enum {
- /**
- * Loaded memory segment is not backed by any code object (anonymous), as the
- * case would be with BSS (uninitialized data).
- */
- HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE = 0,
- /**
- * Loaded memory segment is backed by the code object that is stored in the
- * file.
- */
- HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE = 1,
- /**
- * Loaded memory segment is backed by the code object that is stored in the
- * memory.
- */
- HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY = 2
-} hsa_ven_amd_loader_code_object_storage_type_t;
-
-/**
- * @brief Loaded memory segment descriptor.
- *
- *
- * @details Loaded memory segment descriptor describes underlying loaded memory
- * segment. Loaded memory segment is created/allocated by the executable during
- * the loading of the code object that is backing underlying memory segment.
- *
- * The lifetime of underlying memory segment is limited by the lifetime of the
- * executable that is managing underlying memory segment.
- */
-typedef struct hsa_ven_amd_loader_segment_descriptor_s {
- /**
- * Agent underlying memory segment is allocated on. If the code object that is
- * backing underlying memory segment is program code object, then 0.
- */
- hsa_agent_t agent;
- /**
- * Executable that is managing this underlying memory segment.
- */
- hsa_executable_t executable;
- /**
- * Storage type of the code object that is backing underlying memory segment.
- */
- hsa_ven_amd_loader_code_object_storage_type_t code_object_storage_type;
- /**
- * If the storage type of the code object that is backing underlying memory
- * segment is:
- * - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE, then null;
- * - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE, then null-terminated
- * filepath to the code object;
- * - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY, then host
- * accessible pointer to the first byte of the code object.
- */
- const void *code_object_storage_base;
- /**
- * If the storage type of the code object that is backing underlying memory
- * segment is:
- * - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE, then 0;
- * - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE, then the length of
- * the filepath to the code object (including null-terminating character);
- * - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY, then the size, in
- * bytes, of the memory occupied by the code object.
- */
- size_t code_object_storage_size;
- /**
- * If the storage type of the code object that is backing underlying memory
- * segment is:
- * - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE, then 0;
- * - other, then offset, in bytes, from the beginning of the code object to
- * the first byte in the code object data is copied from.
- */
- size_t code_object_storage_offset;
- /**
- * Starting address of the underlying memory segment.
- */
- const void *segment_base;
- /**
- * Size, in bytes, of the underlying memory segment.
- */
- size_t segment_size;
-} hsa_ven_amd_loader_segment_descriptor_t;
-
-/**
- * @brief Either queries loaded memory segment descriptors, or total number of
- * loaded memory segment descriptors.
- *
- *
- * @details If @p segment_descriptors is not null and @p num_segment_descriptors
- * points to number that exactly matches total number of loaded memory segment
- * descriptors, then queries loaded memory segment descriptors, and records them
- * in @p segment_descriptors. If @p segment_descriptors is null and @p
- * num_segment_descriptors points to zero, then queries total number of loaded
- * memory segment descriptors, and records it in @p num_segment_descriptors. In
- * all other cases returns appropriate error code (see below).
- *
- * The caller of this function is responsible for the allocation/deallocation
- * and the lifetime of @p segment_descriptors and @p num_segment_descriptors.
- *
- * The lifetime of loaded memory segments that are described by queried loaded
- * memory segment descriptors is limited by the lifetime of the executable that
- * is managing loaded memory segments.
- *
- * Queried loaded memory segment descriptors are always self-consistent: they
- * describe a complete set of loaded memory segments that are being backed by
- * fully loaded code objects that are present at the time (i.e. this function
- * is blocked until all executable manipulations are fully complete).
- *
- *
- * @param[out] segment_descriptors Pointer to application-allocated buffer to
- * record queried loaded memory segment descriptors in. Can be null if @p
- * num_segment_descriptors points to zero.
- *
- * @param[in,out] num_segment_descriptors Pointer to application-allocated
- * buffer that contains either total number of loaded memory segment descriptors
- * or zero.
- *
- *
- * @retval HSA_STATUS_SUCCESS Function is executed successfully.
- *
- * @retval HSA_STATUS_ERROR_NOT_INITIALIZED Runtime is not initialized.
- *
- * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p segment_descriptors is null
- * while @p num_segment_descriptors points to non-zero number, @p
- * segment_descriptors is not null while @p num_segment_descriptors points to
- * zero, or @p num_segment_descriptors is null.
- *
- * @retval HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS @p num_segment_descriptors
- * does not point to number that exactly matches total number of loaded memory
- * segment descriptors.
- */
-hsa_status_t hsa_ven_amd_loader_query_segment_descriptors(
- hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
- size_t *num_segment_descriptors);
-
-/**
- * @brief Obtains the handle of executable to which the device address belongs.
- *
- * @details This method should not be used to obtain executable handle by using
- * a host address. The executable returned is expected to be alive until its
- * destroyed by the user.
- *
- * @retval HSA_STATUS_SUCCESS Function is executed successfully.
- *
- * @retval HSA_STATUS_ERROR_NOT_INITIALIZED Runtime is not initialized.
- *
- * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT The input is invalid or there
- * is no exectuable found for this kernel code object.
- */
-hsa_status_t hsa_ven_amd_loader_query_executable(
- const void *device_address,
- hsa_executable_t *executable);
-
-//===----------------------------------------------------------------------===//
-
-/**
- * @brief Iterate over the loaded code objects in an executable, and invoke
- * an application-defined callback on every iteration.
- *
- * @param[in] executable Executable.
- *
- * @param[in] callback Callback to be invoked once per loaded code object. The
- * HSA runtime passes three arguments to the callback: the executable, a
- * loaded code object, and the application data. If @p callback returns a
- * status other than ::HSA_STATUS_SUCCESS for a particular iteration, the
- * traversal stops and
- * ::hsa_ven_amd_loader_executable_iterate_loaded_code_objects returns that
- * status value.
- *
- * @param[in] data Application data that is passed to @p callback on every
- * iteration. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
- */
-hsa_status_t hsa_ven_amd_loader_executable_iterate_loaded_code_objects(
- hsa_executable_t executable,
- hsa_status_t (*callback)(
- hsa_executable_t executable,
- hsa_loaded_code_object_t loaded_code_object,
- void *data),
- void *data);
-
-/**
- * @brief Loaded code object kind.
- */
-typedef enum {
- /**
- * Program code object.
- */
- HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_PROGRAM = 1,
- /**
- * Agent code object.
- */
- HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_AGENT = 2
-} hsa_ven_amd_loader_loaded_code_object_kind_t;
-
-/**
- * @brief Loaded code object attributes.
- */
-typedef enum hsa_ven_amd_loader_loaded_code_object_info_e {
- /**
- * The executable in which this loaded code object is loaded. The
- * type of this attribute is ::hsa_executable_t.
- */
- HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_EXECUTABLE = 1,
- /**
- * The kind of this loaded code object. The type of this attribute is
- * ::uint32_t interpreted as ::hsa_ven_amd_loader_loaded_code_object_kind_t.
- */
- HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_KIND = 2,
- /**
- * The agent on which this loaded code object is loaded. The
- * value of this attribute is only defined if
- * ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_KIND is
- * ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_AGENT. The type of this
- * attribute is ::hsa_agent_t.
- */
- HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_AGENT = 3,
- /**
- * The storage type of the code object reader used to load the loaded code object.
- * The type of this attribute is ::uint32_t interpreted as a
- * ::hsa_ven_amd_loader_code_object_storage_type_t.
- */
- HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE = 4,
- /**
- * The memory address of the first byte of the code object that was loaaded.
- * The value of this attribute is only defined if
- * ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE is
- * ::HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY. The type of this
- * attribute is ::uint64_t.
- */
- HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_BASE = 5,
- /**
- * The memory size in bytes of the code object that was loaaded.
- * The value of this attribute is only defined if
- * ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE is
- * ::HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY. The type of this
- * attribute is ::uint64_t.
- */
- HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_SIZE = 6,
- /**
- * The file descriptor of the code object that was loaaded.
- * The value of this attribute is only defined if
- * ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE is
- * ::HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE. The type of this
- * attribute is ::int.
- */
- HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_FILE = 7,
- /**
- * The signed byte address difference of the memory address at which the code
- * object is loaded minus the virtual address specified in the code object
- * that is loaded. The value of this attribute is only defined if the
- * executable in which the code object is loaded is froozen. The type of this
- * attribute is ::int64_t.
- */
- HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA = 8,
- /**
- * The base memory address at which the code object is loaded. This is the
- * base address of the allocation for the lowest addressed segment of the code
- * object that is loaded. Note that any non-loaded segments before the first
- * loaded segment are ignored. The value of this attribute is only defined if
- * the executable in which the code object is loaded is froozen. The type of
- * this attribute is ::uint64_t.
- */
- HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE = 9,
- /**
- * The byte size of the loaded code objects contiguous memory allocation. The
- * value of this attribute is only defined if the executable in which the code
- * object is loaded is froozen. The type of this attribute is ::uint64_t.
- */
- HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE = 10,
- /**
- * The length of the URI in bytes, not including the NUL terminator. The type
- * of this attribute is uint32_t.
- */
- HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH = 11,
- /**
- * The URI name from which the code object was loaded. The type of this
- * attribute is a NUL terminated \p char* with the length equal to the value
- * of ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH attribute.
- * The URI name syntax is defined by the following BNF syntax:
- *
- * code_object_uri ::== file_uri | memory_uri
- * file_uri ::== "file://" file_path [ range_specifier ]
- * memory_uri ::== "memory://" process_id range_specifier
- * range_specifier ::== [ "#" | "?" ] "offset=" number "&" "size=" number
- * file_path ::== URI_ENCODED_OS_FILE_PATH
- * process_id ::== DECIMAL_NUMBER
- * number ::== HEX_NUMBER | DECIMAL_NUMBER | OCTAL_NUMBER
- *
- * ``number`` is a C integral literal where hexadecimal values are prefixed by
- * "0x" or "0X", and octal values by "0".
- *
- * ``file_path`` is the file's path specified as a URI encoded UTF-8 string.
- * In URI encoding, every character that is not in the regular expression
- * ``[a-zA-Z0-9/_.~-]`` is encoded as two uppercase hexidecimal digits
- * proceeded by "%". Directories in the path are separated by "/".
- *
- * ``offset`` is a 0-based byte offset to the start of the code object. For a
- * file URI, it is from the start of the file specified by the ``file_path``,
- * and if omitted defaults to 0. For a memory URI, it is the memory address
- * and is required.
- *
- * ``size`` is the number of bytes in the code object. For a file URI, if
- * omitted it defaults to the size of the file. It is required for a memory
- * URI.
- *
- * ``process_id`` is the identity of the process owning the memory. For Linux
- * it is the C unsigned integral decimal literal for the process ID (PID).
- *
- * For example:
- *
- * file:///dir1/dir2/file1
- * file:///dir3/dir4/file2#offset=0x2000&size=3000
- * memory://1234#offset=0x20000&size=3000
- */
- HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI = 12,
-} hsa_ven_amd_loader_loaded_code_object_info_t;
-
-/**
- * @brief Get the current value of an attribute for a given loaded code
- * object.
- *
- * @param[in] loaded_code_object Loaded code object.
- *
- * @param[in] attribute Attribute to query.
- *
- * @param[out] value Pointer to an application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT The loaded code object is
- * invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
- * loaded code object attribute, or @p value is NULL.
- */
-hsa_status_t hsa_ven_amd_loader_loaded_code_object_get_info(
- hsa_loaded_code_object_t loaded_code_object,
- hsa_ven_amd_loader_loaded_code_object_info_t attribute,
- void *value);
-
-//===----------------------------------------------------------------------===//
-
-/**
- * @brief Create a code object reader to operate on a file with size and offset.
- *
- * @param[in] file File descriptor. The file must have been opened by
- * application with at least read permissions prior calling this function. The
- * file must contain a vendor-specific code object.
- *
- * The file is owned and managed by the application; the lifetime of the file
- * descriptor must exceed that of any associated code object reader.
- *
- * @param[in] size Size of the code object embedded in @p file.
- *
- * @param[in] offset 0-based offset relative to the beginning of the @p file
- * that denotes the beginning of the code object embedded within the @p file.
- *
- * @param[out] code_object_reader Memory location to store the newly created
- * code object reader handle. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_FILE @p file is not opened with at least
- * read permissions. This condition may also be reported as
- * ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT_READER by the
- * ::hsa_executable_load_agent_code_object function.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT The bytes starting at offset
- * do not form a valid code object. If file size is 0. Or offset > file size.
- * This condition may also be reported as
- * ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT by the
- * ::hsa_executable_load_agent_code_object function.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
- * allocate the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p code_object_reader is NULL.
- */
-hsa_status_t
-hsa_ven_amd_loader_code_object_reader_create_from_file_with_offset_size(
- hsa_file_t file,
- size_t offset,
- size_t size,
- hsa_code_object_reader_t *code_object_reader);
-
-//===----------------------------------------------------------------------===//
-
-/**
- * @brief Extension version.
- */
-#define hsa_ven_amd_loader 001002
-
-/**
- * @brief Extension function table version 1.00.
- */
-typedef struct hsa_ven_amd_loader_1_00_pfn_s {
- hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
- const void *device_address,
- const void **host_address);
-
- hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
- hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
- size_t *num_segment_descriptors);
-
- hsa_status_t (*hsa_ven_amd_loader_query_executable)(
- const void *device_address,
- hsa_executable_t *executable);
-} hsa_ven_amd_loader_1_00_pfn_t;
-
-/**
- * @brief Extension function table version 1.01.
- */
-typedef struct hsa_ven_amd_loader_1_01_pfn_s {
- hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
- const void *device_address,
- const void **host_address);
-
- hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
- hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
- size_t *num_segment_descriptors);
-
- hsa_status_t (*hsa_ven_amd_loader_query_executable)(
- const void *device_address,
- hsa_executable_t *executable);
-
- hsa_status_t (*hsa_ven_amd_loader_executable_iterate_loaded_code_objects)(
- hsa_executable_t executable,
- hsa_status_t (*callback)(
- hsa_executable_t executable,
- hsa_loaded_code_object_t loaded_code_object,
- void *data),
- void *data);
-
- hsa_status_t (*hsa_ven_amd_loader_loaded_code_object_get_info)(
- hsa_loaded_code_object_t loaded_code_object,
- hsa_ven_amd_loader_loaded_code_object_info_t attribute,
- void *value);
-} hsa_ven_amd_loader_1_01_pfn_t;
-
-/**
- * @brief Extension function table version 1.02.
- */
-typedef struct hsa_ven_amd_loader_1_02_pfn_s {
- hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
- const void *device_address,
- const void **host_address);
-
- hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
- hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
- size_t *num_segment_descriptors);
-
- hsa_status_t (*hsa_ven_amd_loader_query_executable)(
- const void *device_address,
- hsa_executable_t *executable);
-
- hsa_status_t (*hsa_ven_amd_loader_executable_iterate_loaded_code_objects)(
- hsa_executable_t executable,
- hsa_status_t (*callback)(
- hsa_executable_t executable,
- hsa_loaded_code_object_t loaded_code_object,
- void *data),
- void *data);
-
- hsa_status_t (*hsa_ven_amd_loader_loaded_code_object_get_info)(
- hsa_loaded_code_object_t loaded_code_object,
- hsa_ven_amd_loader_loaded_code_object_info_t attribute,
- void *value);
-
- hsa_status_t
- (*hsa_ven_amd_loader_code_object_reader_create_from_file_with_offset_size)(
- hsa_file_t file,
- size_t offset,
- size_t size,
- hsa_code_object_reader_t *code_object_reader);
-} hsa_ven_amd_loader_1_02_pfn_t;
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif /* HSA_VEN_AMD_LOADER_H */
diff --git a/third_party/rocm/version.txt b/third_party/rocm/version.txt
deleted file mode 100644
index 21016b3..0000000
--- a/third_party/rocm/version.txt
+++ /dev/null
@@ -1 +0,0 @@
-4.1.1-34