Use external/local ROCm SDK (#5959)

diff --git a/build_tools/third_party/rocm/CMakeLists.txt b/build_tools/third_party/rocm/CMakeLists.txt
index 95ddb61..c2938cc 100644
--- a/build_tools/third_party/rocm/CMakeLists.txt
+++ b/build_tools/third_party/rocm/CMakeLists.txt
@@ -11,8 +11,19 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+if(NOT ${IREE_BUILD_EXPERIMENTAL_ROCM})
+  return()
+endif()
 
-set(ROCM_HEADERS_API_ROOT "${IREE_ROOT_DIR}/third_party/rocm/include")
+if(NOT ROCM_HEADERS_API_ROOT)
+  set(ROCM_HEADERS_API_ROOT "/opt/rocm/include")
+endif()
+
+if (EXISTS ${ROCM_HEADERS_API_ROOT})
+  message(STATUS "ROCm Header Path: ${ROCM_HEADERS_API_ROOT}")
+else()
+  message(SEND_ERROR "Could not locate ROCm: ${ROCM_HEADERS_API_ROOT}")
+endif()
 
 external_cc_library(
   PACKAGE
@@ -27,3 +38,4 @@
     ${ROCM_HEADERS_API_ROOT}
 )
 
+unset(ROCM_HEADERS_API_ROOT)
\ No newline at end of file
diff --git a/third_party/rocm/LICENSE b/third_party/rocm/LICENSE
deleted file mode 100644
index 7c79cca..0000000
--- a/third_party/rocm/LICENSE
+++ /dev/null
@@ -1,27 +0,0 @@
-Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-with the Software without restriction, including without limitation the
-rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-sell copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-    * Redistributions of source code must retain the above copyright notice,
-      this list of conditions and the following disclaimers.
-
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimers in the
-      documentation and/or other materials provided with the distribution.
-
-    * Neither the names of Advanced Micro Devices, Inc. nor the names of its
-      contributors may be used to endorse or promote products derived from
-      this Software without specific prior written permission.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
-THE SOFTWARE.
\ No newline at end of file
diff --git a/third_party/rocm/README.txt b/third_party/rocm/README.txt
deleted file mode 100644
index 202619d..0000000
--- a/third_party/rocm/README.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-This folder contains a subset of ROCM SDK headers needed to build Experimental IREE ROCM Backend.
-It will also contains amdgcn bc files llvm module used to import __oc* function
-during ROCm HSACO(code object) kernel compilation.
diff --git a/third_party/rocm/UPDATING.md b/third_party/rocm/UPDATING.md
deleted file mode 100644
index 2cab28e..0000000
--- a/third_party/rocm/UPDATING.md
+++ /dev/null
@@ -1,15 +0,0 @@
-Those headers come from ROCM SDK.
-
-Currently updates are not supported by ROCm, so we need to uninstall and reinstall ROCm if we want to update
-To update, install ROCM SDK locally:
-```
-sudo apt autoremove rocm-opencl rocm-dkms rocm-dev rocm-utils && sudo reboot
-sudo apt-get install rocm-dkms
-```
-
-Copy HIP and HSA headers, version.txt and libdevice.10.bc:
-```
-cp -RL /opt/rocm/include/hip ./include/
-cp -RL /opt/rocm/include/hsa ./include/
-cp /opt/rocm/.info/version version.txt
-```
diff --git a/third_party/rocm/include/hip/channel_descriptor.h b/third_party/rocm/include/hip/channel_descriptor.h
deleted file mode 100644
index 842701b..0000000
--- a/third_party/rocm/include/hip/channel_descriptor.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_CHANNEL_DESCRIPTOR_H
-#define HIP_INCLUDE_HIP_CHANNEL_DESCRIPTOR_H
-
-// Some standard header files, these are included by hc.hpp and so want to make them avail on both
-// paths to provide a consistent include env and avoid "missing symbol" errors that only appears
-// on NVCC path:
-
-
-#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
-#include <hip/hcc_detail/channel_descriptor.h>
-#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
-#include <hip/nvcc_detail/channel_descriptor.h>
-#else
-#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
-#endif
-
-#endif
diff --git a/third_party/rocm/include/hip/device_functions.h b/third_party/rocm/include/hip/device_functions.h
deleted file mode 100644
index f6059f2..0000000
--- a/third_party/rocm/include/hip/device_functions.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_DEVICE_FUNCTIONS_H
-#define HIP_INCLUDE_HIP_DEVICE_FUNCTIONS_H
-
-#include <hip/hip_common.h>
-
-#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
-#include <hip/hcc_detail/device_functions.h>
-#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
-#include <device_functions.h>
-#else
-#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
-#endif
-
-#endif
diff --git a/third_party/rocm/include/hip/driver_types.h b/third_party/rocm/include/hip/driver_types.h
deleted file mode 100644
index d428ec7..0000000
--- a/third_party/rocm/include/hip/driver_types.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_DRIVER_TYPES_H
-#define HIP_INCLUDE_HIP_DRIVER_TYPES_H
-
-#include <hip/hip_common.h>
-
-#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
-#include <hip/hcc_detail/driver_types.h>
-#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
-#include "driver_types.h"
-#else
-#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
-#endif
-
-#endif
diff --git a/third_party/rocm/include/hip/hcc_detail/channel_descriptor.h b/third_party/rocm/include/hip/hcc_detail/channel_descriptor.h
deleted file mode 100644
index 417451f..0000000
--- a/third_party/rocm/include/hip/hcc_detail/channel_descriptor.h
+++ /dev/null
@@ -1,354 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_CHANNEL_DESCRIPTOR_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_CHANNEL_DESCRIPTOR_H
-
-#include <hip/hip_common.h>
-#include <hip/hcc_detail/driver_types.h>
-#include <hip/hcc_detail/hip_vector_types.h>
-
-#ifdef __cplusplus
-
-#if __HIP_ROCclr__
-extern "C" {
-#endif
-HIP_PUBLIC_API
-hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f);
-#if __HIP_ROCclr__
-}
-#endif
-
-static inline hipChannelFormatDesc hipCreateChannelDescHalf() {
-    int e = (int)sizeof(unsigned short) * 8;
-    return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat);
-}
-
-static inline hipChannelFormatDesc hipCreateChannelDescHalf1() {
-    int e = (int)sizeof(unsigned short) * 8;
-    return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat);
-}
-
-static inline hipChannelFormatDesc hipCreateChannelDescHalf2() {
-    int e = (int)sizeof(unsigned short) * 8;
-    return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat);
-}
-
-template <typename T>
-static inline hipChannelFormatDesc hipCreateChannelDesc() {
-    return hipCreateChannelDesc(0, 0, 0, 0, hipChannelFormatKindNone);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<char>() {
-    int e = (int)sizeof(char) * 8;
-    return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<signed char>() {
-    int e = (int)sizeof(signed char) * 8;
-    return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<unsigned char>() {
-    int e = (int)sizeof(unsigned char) * 8;
-    return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<uchar1>() {
-    int e = (int)sizeof(unsigned char) * 8;
-    return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<char1>() {
-    int e = (int)sizeof(signed char) * 8;
-    return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<uchar2>() {
-    int e = (int)sizeof(unsigned char) * 8;
-    return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<char2>() {
-    int e = (int)sizeof(signed char) * 8;
-    return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned);
-}
-
-#ifndef __GNUC__  // vector3 is the same as vector4
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<uchar3>() {
-    int e = (int)sizeof(unsigned char) * 8;
-    return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<char3>() {
-    int e = (int)sizeof(signed char) * 8;
-    return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned);
-}
-#endif
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<uchar4>() {
-    int e = (int)sizeof(unsigned char) * 8;
-    return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<char4>() {
-    int e = (int)sizeof(signed char) * 8;
-    return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<unsigned short>() {
-    int e = (int)sizeof(unsigned short) * 8;
-    return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<signed short>() {
-    int e = (int)sizeof(signed short) * 8;
-    return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<ushort1>() {
-    int e = (int)sizeof(unsigned short) * 8;
-    return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<short1>() {
-    int e = (int)sizeof(signed short) * 8;
-    return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<ushort2>() {
-    int e = (int)sizeof(unsigned short) * 8;
-    return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<short2>() {
-    int e = (int)sizeof(signed short) * 8;
-    return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned);
-}
-
-#ifndef __GNUC__
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<ushort3>() {
-    int e = (int)sizeof(unsigned short) * 8;
-    return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<short3>() {
-    int e = (int)sizeof(signed short) * 8;
-    return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned);
-}
-#endif
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<ushort4>() {
-    int e = (int)sizeof(unsigned short) * 8;
-    return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<short4>() {
-    int e = (int)sizeof(signed short) * 8;
-    return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<unsigned int>() {
-    int e = (int)sizeof(unsigned int) * 8;
-    return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<signed int>() {
-    int e = (int)sizeof(signed int) * 8;
-    return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<uint1>() {
-    int e = (int)sizeof(unsigned int) * 8;
-    return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<int1>() {
-    int e = (int)sizeof(signed int) * 8;
-    return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<uint2>() {
-    int e = (int)sizeof(unsigned int) * 8;
-    return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<int2>() {
-    int e = (int)sizeof(signed int) * 8;
-    return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned);
-}
-
-#ifndef __GNUC__
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<uint3>() {
-    int e = (int)sizeof(unsigned int) * 8;
-    return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<int3>() {
-    int e = (int)sizeof(signed int) * 8;
-    return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned);
-}
-#endif
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<uint4>() {
-    int e = (int)sizeof(unsigned int) * 8;
-    return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<int4>() {
-    int e = (int)sizeof(signed int) * 8;
-    return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<float>() {
-    int e = (int)sizeof(float) * 8;
-    return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<float1>() {
-    int e = (int)sizeof(float) * 8;
-    return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<float2>() {
-    int e = (int)sizeof(float) * 8;
-    return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindFloat);
-}
-
-#ifndef __GNUC__
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<float3>() {
-    int e = (int)sizeof(float) * 8;
-    return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindFloat);
-}
-#endif
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<float4>() {
-    int e = (int)sizeof(float) * 8;
-    return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindFloat);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<unsigned long>() {
-    int e = (int)sizeof(unsigned long) * 8;
-    return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<signed long>() {
-    int e = (int)sizeof(signed long) * 8;
-    return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<ulong1>() {
-    int e = (int)sizeof(unsigned long) * 8;
-    return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<long1>() {
-    int e = (int)sizeof(signed long) * 8;
-    return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<ulong2>() {
-    int e = (int)sizeof(unsigned long) * 8;
-    return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<long2>() {
-    int e = (int)sizeof(signed long) * 8;
-    return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned);
-}
-
-#ifndef __GNUC__
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<ulong3>() {
-    int e = (int)sizeof(unsigned long) * 8;
-    return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<long3>() {
-    int e = (int)sizeof(signed long) * 8;
-    return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned);
-}
-#endif
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<ulong4>() {
-    int e = (int)sizeof(unsigned long) * 8;
-    return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindUnsigned);
-}
-
-template <>
-inline hipChannelFormatDesc hipCreateChannelDesc<long4>() {
-    int e = (int)sizeof(signed long) * 8;
-    return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned);
-}
-
-#else
-
-struct hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w,
-                                                 enum hipChannelFormatKind f);
-
-#endif
-
-#endif
diff --git a/third_party/rocm/include/hip/hcc_detail/concepts.hpp b/third_party/rocm/include/hip/hcc_detail/concepts.hpp
deleted file mode 100644
index 373cefb..0000000
--- a/third_party/rocm/include/hip/hcc_detail/concepts.hpp
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
-Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#pragma once
-
-namespace hip_impl  // Documentation only.
-{
-#define requires(...)
-
-#define FunctionalProcedure typename
-}  // namespace hip_impl
diff --git a/third_party/rocm/include/hip/hcc_detail/cuda/cuda.h b/third_party/rocm/include/hip/hcc_detail/cuda/cuda.h
deleted file mode 100644
index 8b13789..0000000
--- a/third_party/rocm/include/hip/hcc_detail/cuda/cuda.h
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/third_party/rocm/include/hip/hcc_detail/cuda/math_functions.h b/third_party/rocm/include/hip/hcc_detail/cuda/math_functions.h
deleted file mode 100644
index 8b13789..0000000
--- a/third_party/rocm/include/hip/hcc_detail/cuda/math_functions.h
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/third_party/rocm/include/hip/hcc_detail/device_functions.h b/third_party/rocm/include/hip/hcc_detail/device_functions.h
deleted file mode 100644
index 515b4cc..0000000
--- a/third_party/rocm/include/hip/hcc_detail/device_functions.h
+++ /dev/null
@@ -1,1431 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_DEVICE_FUNCTIONS_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_DEVICE_FUNCTIONS_H
-
-#include "host_defines.h"
-#include "math_fwd.h"
-
-#include <hip/hip_runtime_api.h>
-#include <stddef.h>
-
-
-#include <hip/hip_vector_types.h>
-#include <hip/hcc_detail/device_library_decls.h>
-#include <hip/hcc_detail/llvm_intrinsics.h>
-
-#if __HIP_CLANG_ONLY__ && __HIP_ROCclr__ && !_WIN32
-extern "C" __device__ int printf(const char *fmt, ...);
-#else
-#if HC_FEATURE_PRINTF
-template <typename... All>
-static inline __device__ void printf(const char* format, All... all) {
-    hc::printf(format, all...);
-}
-#else
-template <typename... All>
-static inline __device__ void printf(const char* format, All... all) {}
-#endif // HC_FEATURE_PRINTF
-#endif // __HIP_CLANG_ONLY__ && __HIP_ROCclr__
-
-/*
-Integer Intrinsics
-*/
-
-// integer intrinsic function __poc __clz __ffs __brev
-__device__ static inline unsigned int __popc(unsigned int input) {
-    return __builtin_popcount(input);
-}
-__device__ static inline unsigned int __popcll(unsigned long long int input) {
-    return __builtin_popcountll(input);
-}
-
-__device__ static inline int __clz(int input) {
-    return __ockl_clz_u32((uint)input);
-}
-
-__device__ static inline int __clzll(long long int input) {
-    return __ockl_clz_u64((ullong)input);
-}
-
-__device__ static inline unsigned int __ffs(unsigned int input) {
-    return ( input == 0 ? -1 : __builtin_ctz(input) ) + 1;
-}
-
-__device__ static inline unsigned int __ffsll(unsigned long long int input) {
-    return ( input == 0 ? -1 : __builtin_ctzll(input) ) + 1;
-}
-
-__device__ static inline unsigned int __ffs(int input) {
-    return ( input == 0 ? -1 : __builtin_ctz(input) ) + 1;
-}
-
-__device__ static inline unsigned int __ffsll(long long int input) {
-    return ( input == 0 ? -1 : __builtin_ctzll(input) ) + 1;
-}
-
-__device__ static inline unsigned int __brev(unsigned int input) {
-    return __builtin_bitreverse32(input);
-}
-
-__device__ static inline unsigned long long int __brevll(unsigned long long int input) {
-    return __builtin_bitreverse64(input);
-}
-
-__device__ static inline unsigned int __lastbit_u32_u64(uint64_t input) {
-    return input == 0 ? -1 : __builtin_ctzl(input);
-}
-
-__device__ static inline unsigned int __bitextract_u32(unsigned int src0, unsigned int src1, unsigned int src2) {
-    uint32_t offset = src1 & 31;
-    uint32_t width = src2 & 31;
-    return width == 0 ? 0 : (src0 << (32 - offset - width)) >> (32 - width);
-}
-
-__device__ static inline uint64_t __bitextract_u64(uint64_t src0, unsigned int src1, unsigned int src2) {
-    uint64_t offset = src1 & 63;
-    uint64_t width = src2 & 63;
-    return width == 0 ? 0 : (src0 << (64 - offset - width)) >> (64 - width);
-}
-
-__device__ static inline unsigned int __bitinsert_u32(unsigned int src0, unsigned int src1, unsigned int src2, unsigned int src3) {
-    uint32_t offset = src2 & 31;
-    uint32_t width = src3 & 31;
-    uint32_t mask = (1 << width) - 1;
-    return ((src0 & ~(mask << offset)) | ((src1 & mask) << offset));
-}
-
-__device__ static inline uint64_t __bitinsert_u64(uint64_t src0, uint64_t src1, unsigned int src2, unsigned int src3) {
-    uint64_t offset = src2 & 63;
-    uint64_t width = src3 & 63;
-    uint64_t mask = (1ULL << width) - 1;
-    return ((src0 & ~(mask << offset)) | ((src1 & mask) << offset));
-}
-
-__device__ static unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int s);
-__device__ static unsigned int __hadd(int x, int y);
-__device__ static int __mul24(int x, int y);
-__device__ static long long int __mul64hi(long long int x, long long int y);
-__device__ static int __mulhi(int x, int y);
-__device__ static int __rhadd(int x, int y);
-__device__ static unsigned int __sad(int x, int y,unsigned int z);
-__device__ static unsigned int __uhadd(unsigned int x, unsigned int y);
-__device__ static int __umul24(unsigned int x, unsigned int y);
-__device__ static unsigned long long int __umul64hi(unsigned long long int x, unsigned long long int y);
-__device__ static unsigned int __umulhi(unsigned int x, unsigned int y);
-__device__ static unsigned int __urhadd(unsigned int x, unsigned int y);
-__device__ static unsigned int __usad(unsigned int x, unsigned int y, unsigned int z);
-
-struct ucharHolder {
-    union {
-        unsigned char c[4];
-        unsigned int ui;
-    };
-} __attribute__((aligned(4)));
-
-struct uchar2Holder {
-    union {
-        unsigned int ui[2];
-        unsigned char c[8];
-    };
-} __attribute__((aligned(8)));
-
-__device__
-static inline unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int s) {
-    struct uchar2Holder cHoldVal;
-    struct ucharHolder cHoldKey;
-    struct ucharHolder cHoldOut;
-    cHoldKey.ui = s;
-    cHoldVal.ui[0] = x;
-    cHoldVal.ui[1] = y;
-    cHoldOut.c[0] = cHoldVal.c[cHoldKey.c[0]];
-    cHoldOut.c[1] = cHoldVal.c[cHoldKey.c[1]];
-    cHoldOut.c[2] = cHoldVal.c[cHoldKey.c[2]];
-    cHoldOut.c[3] = cHoldVal.c[cHoldKey.c[3]];
-    return cHoldOut.ui;
-}
-
-__device__ static inline unsigned int __hadd(int x, int y) {
-    int z = x + y;
-    int sign = z & 0x8000000;
-    int value = z & 0x7FFFFFFF;
-    return ((value) >> 1 || sign);
-}
-
-__device__ static inline int __mul24(int x, int y) {
-    return __ockl_mul24_i32(x, y);
-}
-
-__device__ static inline long long __mul64hi(long long int x, long long int y) {
-    ulong x0 = (ulong)x & 0xffffffffUL;
-    long x1 = x >> 32;
-    ulong y0 = (ulong)y & 0xffffffffUL;
-    long y1 = y >> 32;
-    ulong z0 = x0*y0;
-    long t = x1*y0 + (z0 >> 32);
-    long z1 = t & 0xffffffffL;
-    long z2 = t >> 32;
-    z1 = x0*y1 + z1;
-    return x1*y1 + z2 + (z1 >> 32);
-}
-
-__device__ static inline int __mulhi(int x, int y) {
-    return __ockl_mul_hi_i32(x, y);
-}
-
-__device__ static inline int __rhadd(int x, int y) {
-    int z = x + y + 1;
-    int sign = z & 0x8000000;
-    int value = z & 0x7FFFFFFF;
-    return ((value) >> 1 || sign);
-}
-__device__ static inline unsigned int __sad(int x, int y, unsigned int z) {
-    return x > y ? x - y + z : y - x + z;
-}
-__device__ static inline unsigned int __uhadd(unsigned int x, unsigned int y) {
-    return (x + y) >> 1;
-}
-__device__ static inline int __umul24(unsigned int x, unsigned int y) {
-    return __ockl_mul24_u32(x, y);
-}
-
-__device__
-static inline unsigned long long __umul64hi(unsigned long long int x, unsigned long long int y) {
-    ulong x0 = x & 0xffffffffUL;
-    ulong x1 = x >> 32;
-    ulong y0 = y & 0xffffffffUL;
-    ulong y1 = y >> 32;
-    ulong z0 = x0*y0;
-    ulong t = x1*y0 + (z0 >> 32);
-    ulong z1 = t & 0xffffffffUL;
-    ulong z2 = t >> 32;
-    z1 = x0*y1 + z1;
-    return x1*y1 + z2 + (z1 >> 32);
-}
-
-__device__ static inline unsigned int __umulhi(unsigned int x, unsigned int y) {
-    return __ockl_mul_hi_u32(x, y);
-}
-__device__ static inline unsigned int __urhadd(unsigned int x, unsigned int y) {
-    return (x + y + 1) >> 1;
-}
-__device__ static inline unsigned int __usad(unsigned int x, unsigned int y, unsigned int z) {
-    return __ockl_sadd_u32(x, y, z);
-}
-
-__device__ static inline unsigned int __lane_id() {
-    return  __builtin_amdgcn_mbcnt_hi(
-        -1, __builtin_amdgcn_mbcnt_lo(-1, 0));
-}
-
-__device__
-static inline unsigned int __mbcnt_lo(unsigned int x, unsigned int y) {return __builtin_amdgcn_mbcnt_lo(x,y);};
-
-__device__
-static inline unsigned int __mbcnt_hi(unsigned int x, unsigned int y) {return __builtin_amdgcn_mbcnt_hi(x,y);};
-
-/*
-HIP specific device functions
-*/
-
-__device__ static inline unsigned __hip_ds_bpermute(int index, unsigned src) {
-    union { int i; unsigned u; float f; } tmp; tmp.u = src;
-    tmp.i = __builtin_amdgcn_ds_bpermute(index, tmp.i);
-    return tmp.u;
-}
-
-__device__ static inline float __hip_ds_bpermutef(int index, float src) {
-    union { int i; unsigned u; float f; } tmp; tmp.f = src;
-    tmp.i = __builtin_amdgcn_ds_bpermute(index, tmp.i);
-    return tmp.f;
-}
-
-__device__ static inline unsigned __hip_ds_permute(int index, unsigned src) {
-    union { int i; unsigned u; float f; } tmp; tmp.u = src;
-    tmp.i = __builtin_amdgcn_ds_permute(index, tmp.i);
-    return tmp.u;
-}
-
-__device__ static inline float __hip_ds_permutef(int index, float src) {
-    union { int i; unsigned u; float f; } tmp; tmp.u = src;
-    tmp.i = __builtin_amdgcn_ds_permute(index, tmp.i);
-    return tmp.u;
-}
-
-#define __hip_ds_swizzle(src, pattern)  __hip_ds_swizzle_N<(pattern)>((src))
-#define __hip_ds_swizzlef(src, pattern) __hip_ds_swizzlef_N<(pattern)>((src))
-
-template <int pattern>
-__device__ static inline unsigned __hip_ds_swizzle_N(unsigned int src) {
-    union { int i; unsigned u; float f; } tmp; tmp.u = src;
-#if defined(__HCC__)
-    tmp.i = __llvm_amdgcn_ds_swizzle(tmp.i, pattern);
-#else
-    tmp.i = __builtin_amdgcn_ds_swizzle(tmp.i, pattern);
-#endif
-    return tmp.u;
-}
-
-template <int pattern>
-__device__ static inline float __hip_ds_swizzlef_N(float src) {
-    union { int i; unsigned u; float f; } tmp; tmp.f = src;
-#if defined(__HCC__)
-    tmp.i = __llvm_amdgcn_ds_swizzle(tmp.i, pattern);
-#else
-    tmp.i = __builtin_amdgcn_ds_swizzle(tmp.i, pattern);
-#endif
-    return tmp.f;
-}
-
-#define __hip_move_dpp(src, dpp_ctrl, row_mask, bank_mask, bound_ctrl) \
-  __hip_move_dpp_N<(dpp_ctrl), (row_mask), (bank_mask), (bound_ctrl)>((src))
-
-template <int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl>
-__device__ static inline int __hip_move_dpp_N(int src) {
-    return __builtin_amdgcn_mov_dpp(src, dpp_ctrl, row_mask, bank_mask,
-                                    bound_ctrl);
-}
-
-// FIXME: Remove the following workaround once the clang change is released.
-// This is for backward compatibility with older clang which does not define
-// __AMDGCN_WAVEFRONT_SIZE. It does not consider -mwavefrontsize64.
-#ifndef __AMDGCN_WAVEFRONT_SIZE
-#if __gfx1010__ || __gfx1011__ || __gfx1012__ || __gfx1030__ || __gfx1031__
-#define __AMDGCN_WAVEFRONT_SIZE 32
-#else
-#define __AMDGCN_WAVEFRONT_SIZE 64
-#endif
-#endif
-static constexpr int warpSize = __AMDGCN_WAVEFRONT_SIZE;
-
-__device__
-inline
-int __shfl(int var, int src_lane, int width = warpSize) {
-    int self = __lane_id();
-    int index = src_lane + (self & ~(width-1));
-    return __builtin_amdgcn_ds_bpermute(index<<2, var);
-}
-__device__
-inline
-unsigned int __shfl(unsigned int var, int src_lane, int width = warpSize) {
-     union { int i; unsigned u; float f; } tmp; tmp.u = var;
-    tmp.i = __shfl(tmp.i, src_lane, width);
-    return tmp.u;
-}
-__device__
-inline
-float __shfl(float var, int src_lane, int width = warpSize) {
-    union { int i; unsigned u; float f; } tmp; tmp.f = var;
-    tmp.i = __shfl(tmp.i, src_lane, width);
-    return tmp.f;
-}
-__device__
-inline
-double __shfl(double var, int src_lane, int width = warpSize) {
-    static_assert(sizeof(double) == 2 * sizeof(int), "");
-    static_assert(sizeof(double) == sizeof(uint64_t), "");
-
-    int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
-    tmp[0] = __shfl(tmp[0], src_lane, width);
-    tmp[1] = __shfl(tmp[1], src_lane, width);
-
-    uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
-    double tmp1;  __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
-    return tmp1;
-}
-__device__
-inline
-long __shfl(long var, int src_lane, int width = warpSize)
-{
-    #ifndef _MSC_VER
-    static_assert(sizeof(long) == 2 * sizeof(int), "");
-    static_assert(sizeof(long) == sizeof(uint64_t), "");
-
-    int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
-    tmp[0] = __shfl(tmp[0], src_lane, width);
-    tmp[1] = __shfl(tmp[1], src_lane, width);
-
-    uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
-    long tmp1;  __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
-    return tmp1;
-    #else
-    static_assert(sizeof(long) == sizeof(int), "");
-    return static_cast<long>(__shfl(static_cast<int>(var), src_lane, width));
-    #endif
-}
-__device__
-inline
-unsigned long __shfl(unsigned long var, int src_lane, int width = warpSize) {
-    #ifndef _MSC_VER
-    static_assert(sizeof(unsigned long) == 2 * sizeof(unsigned int), "");
-    static_assert(sizeof(unsigned long) == sizeof(uint64_t), "");
-
-    unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
-    tmp[0] = __shfl(tmp[0], src_lane, width);
-    tmp[1] = __shfl(tmp[1], src_lane, width);
-
-    uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
-    unsigned long tmp1;  __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
-    return tmp1;
-    #else
-    static_assert(sizeof(unsigned long) == sizeof(unsigned int), "");
-    return static_cast<unsigned long>(__shfl(static_cast<unsigned int>(var), src_lane, width));
-    #endif
-}
-__device__
-inline
-long long __shfl(long long var, int src_lane, int width = warpSize)
-{
-    static_assert(sizeof(long long) == 2 * sizeof(int), "");
-    static_assert(sizeof(long long) == sizeof(uint64_t), "");
-
-    int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
-    tmp[0] = __shfl(tmp[0], src_lane, width);
-    tmp[1] = __shfl(tmp[1], src_lane, width);
-
-    uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
-    long long tmp1;  __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
-    return tmp1;
-}
-__device__
-inline
-unsigned long long __shfl(unsigned long long var, int src_lane, int width = warpSize) {
-    static_assert(sizeof(unsigned long long) == 2 * sizeof(unsigned int), "");
-    static_assert(sizeof(unsigned long long) == sizeof(uint64_t), "");
-
-    unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
-    tmp[0] = __shfl(tmp[0], src_lane, width);
-    tmp[1] = __shfl(tmp[1], src_lane, width);
-
-    uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
-    unsigned long long tmp1;  __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
-    return tmp1;
-}
-
-__device__
-inline
-int __shfl_up(int var, unsigned int lane_delta, int width = warpSize) {
-    int self = __lane_id();
-    int index = self - lane_delta;
-    index = (index < (self & ~(width-1)))?self:index;
-    return __builtin_amdgcn_ds_bpermute(index<<2, var);
-}
-__device__
-inline
-unsigned int __shfl_up(unsigned int var, unsigned int lane_delta, int width = warpSize) {
-    union { int i; unsigned u; float f; } tmp; tmp.u = var;
-    tmp.i = __shfl_up(tmp.i, lane_delta, width);
-    return tmp.u;
-}
-__device__
-inline
-float __shfl_up(float var, unsigned int lane_delta, int width = warpSize) {
-    union { int i; unsigned u; float f; } tmp; tmp.f = var;
-    tmp.i = __shfl_up(tmp.i, lane_delta, width);
-    return tmp.f;
-}
-__device__
-inline
-double __shfl_up(double var, unsigned int lane_delta, int width = warpSize) {
-    static_assert(sizeof(double) == 2 * sizeof(int), "");
-    static_assert(sizeof(double) == sizeof(uint64_t), "");
-
-    int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
-    tmp[0] = __shfl_up(tmp[0], lane_delta, width);
-    tmp[1] = __shfl_up(tmp[1], lane_delta, width);
-
-    uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
-    double tmp1;  __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
-    return tmp1;
-}
-__device__
-inline
-long __shfl_up(long var, unsigned int lane_delta, int width = warpSize)
-{
-    #ifndef _MSC_VER
-    static_assert(sizeof(long) == 2 * sizeof(int), "");
-    static_assert(sizeof(long) == sizeof(uint64_t), "");
-
-    int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
-    tmp[0] = __shfl_up(tmp[0], lane_delta, width);
-    tmp[1] = __shfl_up(tmp[1], lane_delta, width);
-
-    uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
-    long tmp1;  __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
-    return tmp1;
-    #else
-    static_assert(sizeof(long) == sizeof(int), "");
-    return static_cast<long>(__shfl_up(static_cast<int>(var), lane_delta, width));
-    #endif
-}
-
-__device__
-inline
-unsigned long __shfl_up(unsigned long var, unsigned int lane_delta, int width = warpSize)
-{
-    #ifndef _MSC_VER
-    static_assert(sizeof(unsigned long) == 2 * sizeof(unsigned int), "");
-    static_assert(sizeof(unsigned long) == sizeof(uint64_t), "");
-
-    unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
-    tmp[0] = __shfl_up(tmp[0], lane_delta, width);
-    tmp[1] = __shfl_up(tmp[1], lane_delta, width);
-
-    uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
-    unsigned long tmp1;  __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
-    return tmp1;
-    #else
-    static_assert(sizeof(unsigned long) == sizeof(unsigned int), "");
-    return static_cast<unsigned long>(__shfl_up(static_cast<unsigned int>(var), lane_delta, width));
-    #endif
-}
-
-__device__
-inline
-long long __shfl_up(long long var, unsigned int lane_delta, int width = warpSize)
-{
-    static_assert(sizeof(long long) == 2 * sizeof(int), "");
-    static_assert(sizeof(long long) == sizeof(uint64_t), "");
-    int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
-    tmp[0] = __shfl_up(tmp[0], lane_delta, width);
-    tmp[1] = __shfl_up(tmp[1], lane_delta, width);
-    uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
-    long long tmp1;  __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
-    return tmp1;
-}
-
-__device__
-inline
-unsigned long long __shfl_up(unsigned long long var, unsigned int lane_delta, int width = warpSize)
-{
-    static_assert(sizeof(unsigned long long) == 2 * sizeof(unsigned int), "");
-    static_assert(sizeof(unsigned long long) == sizeof(uint64_t), "");
-    unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
-    tmp[0] = __shfl_up(tmp[0], lane_delta, width);
-    tmp[1] = __shfl_up(tmp[1], lane_delta, width);
-    uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
-    unsigned long long tmp1;  __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
-    return tmp1;
-}
-
-__device__
-inline
-int __shfl_down(int var, unsigned int lane_delta, int width = warpSize) {
-    int self = __lane_id();
-    int index = self + lane_delta;
-    index = (int)((self&(width-1))+lane_delta) >= width?self:index;
-    return __builtin_amdgcn_ds_bpermute(index<<2, var);
-}
-__device__
-inline
-unsigned int __shfl_down(unsigned int var, unsigned int lane_delta, int width = warpSize) {
-    union { int i; unsigned u; float f; } tmp; tmp.u = var;
-    tmp.i = __shfl_down(tmp.i, lane_delta, width);
-    return tmp.u;
-}
-__device__
-inline
-float __shfl_down(float var, unsigned int lane_delta, int width = warpSize) {
-    union { int i; unsigned u; float f; } tmp; tmp.f = var;
-    tmp.i = __shfl_down(tmp.i, lane_delta, width);
-    return tmp.f;
-}
-__device__
-inline
-double __shfl_down(double var, unsigned int lane_delta, int width = warpSize) {
-    static_assert(sizeof(double) == 2 * sizeof(int), "");
-    static_assert(sizeof(double) == sizeof(uint64_t), "");
-
-    int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
-    tmp[0] = __shfl_down(tmp[0], lane_delta, width);
-    tmp[1] = __shfl_down(tmp[1], lane_delta, width);
-
-    uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
-    double tmp1;  __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
-    return tmp1;
-}
-__device__
-inline
-long __shfl_down(long var, unsigned int lane_delta, int width = warpSize)
-{
-    #ifndef _MSC_VER
-    static_assert(sizeof(long) == 2 * sizeof(int), "");
-    static_assert(sizeof(long) == sizeof(uint64_t), "");
-
-    int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
-    tmp[0] = __shfl_down(tmp[0], lane_delta, width);
-    tmp[1] = __shfl_down(tmp[1], lane_delta, width);
-
-    uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
-    long tmp1;  __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
-    return tmp1;
-    #else
-    static_assert(sizeof(long) == sizeof(int), "");
-    return static_cast<long>(__shfl_down(static_cast<int>(var), lane_delta, width));
-    #endif
-}
-__device__
-inline
-unsigned long __shfl_down(unsigned long var, unsigned int lane_delta, int width = warpSize)
-{
-    #ifndef _MSC_VER
-    static_assert(sizeof(unsigned long) == 2 * sizeof(unsigned int), "");
-    static_assert(sizeof(unsigned long) == sizeof(uint64_t), "");
-
-    unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
-    tmp[0] = __shfl_down(tmp[0], lane_delta, width);
-    tmp[1] = __shfl_down(tmp[1], lane_delta, width);
-
-    uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
-    unsigned long tmp1;  __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
-    return tmp1;
-    #else
-    static_assert(sizeof(unsigned long) == sizeof(unsigned int), "");
-    return static_cast<unsigned long>(__shfl_down(static_cast<unsigned int>(var), lane_delta, width));
-    #endif
-}
-__device__
-inline
-long long __shfl_down(long long var, unsigned int lane_delta, int width = warpSize)
-{
-    static_assert(sizeof(long long) == 2 * sizeof(int), "");
-    static_assert(sizeof(long long) == sizeof(uint64_t), "");
-    int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
-    tmp[0] = __shfl_down(tmp[0], lane_delta, width);
-    tmp[1] = __shfl_down(tmp[1], lane_delta, width);
-    uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
-    long long tmp1;  __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
-    return tmp1;
-}
-__device__
-inline
-unsigned long long __shfl_down(unsigned long long var, unsigned int lane_delta, int width = warpSize)
-{
-    static_assert(sizeof(unsigned long long) == 2 * sizeof(unsigned int), "");
-    static_assert(sizeof(unsigned long long) == sizeof(uint64_t), "");
-    unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
-    tmp[0] = __shfl_down(tmp[0], lane_delta, width);
-    tmp[1] = __shfl_down(tmp[1], lane_delta, width);
-    uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
-    unsigned long long tmp1;  __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
-    return tmp1;
-}
-
-__device__
-inline
-int __shfl_xor(int var, int lane_mask, int width = warpSize) {
-    int self = __lane_id();
-    int index = self^lane_mask;
-    index = index >= ((self+width)&~(width-1))?self:index;
-    return __builtin_amdgcn_ds_bpermute(index<<2, var);
-}
-__device__
-inline
-unsigned int __shfl_xor(unsigned int var, int lane_mask, int width = warpSize) {
-    union { int i; unsigned u; float f; } tmp; tmp.u = var;
-    tmp.i = __shfl_xor(tmp.i, lane_mask, width);
-    return tmp.u;
-}
-__device__
-inline
-float __shfl_xor(float var, int lane_mask, int width = warpSize) {
-    union { int i; unsigned u; float f; } tmp; tmp.f = var;
-    tmp.i = __shfl_xor(tmp.i, lane_mask, width);
-    return tmp.f;
-}
-__device__
-inline
-double __shfl_xor(double var, int lane_mask, int width = warpSize) {
-    static_assert(sizeof(double) == 2 * sizeof(int), "");
-    static_assert(sizeof(double) == sizeof(uint64_t), "");
-
-    int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
-    tmp[0] = __shfl_xor(tmp[0], lane_mask, width);
-    tmp[1] = __shfl_xor(tmp[1], lane_mask, width);
-
-    uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
-    double tmp1;  __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
-    return tmp1;
-}
-__device__
-inline
-long __shfl_xor(long var, int lane_mask, int width = warpSize)
-{
-    #ifndef _MSC_VER
-    static_assert(sizeof(long) == 2 * sizeof(int), "");
-    static_assert(sizeof(long) == sizeof(uint64_t), "");
-
-    int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
-    tmp[0] = __shfl_xor(tmp[0], lane_mask, width);
-    tmp[1] = __shfl_xor(tmp[1], lane_mask, width);
-
-    uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
-    long tmp1;  __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
-    return tmp1;
-    #else
-    static_assert(sizeof(long) == sizeof(int), "");
-    return static_cast<long>(__shfl_xor(static_cast<int>(var), lane_mask, width));
-    #endif
-}
-__device__
-inline
-unsigned long __shfl_xor(unsigned long var, int lane_mask, int width = warpSize)
-{
-    #ifndef _MSC_VER
-    static_assert(sizeof(unsigned long) == 2 * sizeof(unsigned int), "");
-    static_assert(sizeof(unsigned long) == sizeof(uint64_t), "");
-
-    unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
-    tmp[0] = __shfl_xor(tmp[0], lane_mask, width);
-    tmp[1] = __shfl_xor(tmp[1], lane_mask, width);
-
-    uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
-    unsigned long tmp1;  __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
-    return tmp1;
-    #else
-    static_assert(sizeof(unsigned long) == sizeof(unsigned int), "");
-    return static_cast<unsigned long>(__shfl_xor(static_cast<unsigned int>(var), lane_mask, width));
-    #endif
-}
-__device__
-inline
-long long __shfl_xor(long long var, int lane_mask, int width = warpSize)
-{
-    static_assert(sizeof(long long) == 2 * sizeof(int), "");
-    static_assert(sizeof(long long) == sizeof(uint64_t), "");
-    int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
-    tmp[0] = __shfl_xor(tmp[0], lane_mask, width);
-    tmp[1] = __shfl_xor(tmp[1], lane_mask, width);
-    uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
-    long long tmp1;  __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
-    return tmp1;
-}
-__device__
-inline
-unsigned long long __shfl_xor(unsigned long long var, int lane_mask, int width = warpSize)
-{
-    static_assert(sizeof(unsigned long long) == 2 * sizeof(unsigned int), "");
-    static_assert(sizeof(unsigned long long) == sizeof(uint64_t), "");
-    unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
-    tmp[0] = __shfl_xor(tmp[0], lane_mask, width);
-    tmp[1] = __shfl_xor(tmp[1], lane_mask, width);
-    uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
-    unsigned long long tmp1;  __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
-    return tmp1;
-}
-#define MASK1 0x00ff00ff
-#define MASK2 0xff00ff00
-
-__device__ static inline char4 __hip_hc_add8pk(char4 in1, char4 in2) {
-    char4 out;
-    unsigned one1 = in1.w & MASK1;
-    unsigned one2 = in2.w & MASK1;
-    out.w = (one1 + one2) & MASK1;
-    one1 = in1.w & MASK2;
-    one2 = in2.w & MASK2;
-    out.w = out.w | ((one1 + one2) & MASK2);
-    return out;
-}
-
-__device__ static inline char4 __hip_hc_sub8pk(char4 in1, char4 in2) {
-    char4 out;
-    unsigned one1 = in1.w & MASK1;
-    unsigned one2 = in2.w & MASK1;
-    out.w = (one1 - one2) & MASK1;
-    one1 = in1.w & MASK2;
-    one2 = in2.w & MASK2;
-    out.w = out.w | ((one1 - one2) & MASK2);
-    return out;
-}
-
-__device__ static inline char4 __hip_hc_mul8pk(char4 in1, char4 in2) {
-    char4 out;
-    unsigned one1 = in1.w & MASK1;
-    unsigned one2 = in2.w & MASK1;
-    out.w = (one1 * one2) & MASK1;
-    one1 = in1.w & MASK2;
-    one2 = in2.w & MASK2;
-    out.w = out.w | ((one1 * one2) & MASK2);
-    return out;
-}
-
-/*
- * Rounding modes are not yet supported in HIP
- * TODO: Conversion functions are not correct, need to fix when BE is ready
-*/
-
-__device__ static inline float __double2float_rd(double x) { return (double)x; }
-__device__ static inline float __double2float_rn(double x) { return (double)x; }
-__device__ static inline float __double2float_ru(double x) { return (double)x; }
-__device__ static inline float __double2float_rz(double x) { return (double)x; }
-
-__device__ static inline int __double2hiint(double x) {
-    static_assert(sizeof(double) == 2 * sizeof(int), "");
-
-    int tmp[2];
-    __builtin_memcpy(tmp, &x, sizeof(tmp));
-
-    return tmp[1];
-}
-__device__ static inline int __double2loint(double x) {
-    static_assert(sizeof(double) == 2 * sizeof(int), "");
-
-    int tmp[2];
-    __builtin_memcpy(tmp, &x, sizeof(tmp));
-
-    return tmp[0];
-}
-
-__device__ static inline int __double2int_rd(double x) { return (int)x; }
-__device__ static inline int __double2int_rn(double x) { return (int)x; }
-__device__ static inline int __double2int_ru(double x) { return (int)x; }
-__device__ static inline int __double2int_rz(double x) { return (int)x; }
-
-__device__ static inline long long int __double2ll_rd(double x) { return (long long int)x; }
-__device__ static inline long long int __double2ll_rn(double x) { return (long long int)x; }
-__device__ static inline long long int __double2ll_ru(double x) { return (long long int)x; }
-__device__ static inline long long int __double2ll_rz(double x) { return (long long int)x; }
-
-__device__ static inline unsigned int __double2uint_rd(double x) { return (unsigned int)x; }
-__device__ static inline unsigned int __double2uint_rn(double x) { return (unsigned int)x; }
-__device__ static inline unsigned int __double2uint_ru(double x) { return (unsigned int)x; }
-__device__ static inline unsigned int __double2uint_rz(double x) { return (unsigned int)x; }
-
-__device__ static inline unsigned long long int __double2ull_rd(double x) {
-    return (unsigned long long int)x;
-}
-__device__ static inline unsigned long long int __double2ull_rn(double x) {
-    return (unsigned long long int)x;
-}
-__device__ static inline unsigned long long int __double2ull_ru(double x) {
-    return (unsigned long long int)x;
-}
-__device__ static inline unsigned long long int __double2ull_rz(double x) {
-    return (unsigned long long int)x;
-}
-
-__device__ static inline long long int __double_as_longlong(double x) {
-    static_assert(sizeof(long long) == sizeof(double), "");
-
-    long long tmp;
-    __builtin_memcpy(&tmp, &x, sizeof(tmp));
-
-    return tmp;
-}
-
-/*
-__device__ unsigned short __float2half_rn(float x);
-__device__ float __half2float(unsigned short);
-
-The above device function are not a valid .
-Use
-__device__ __half __float2half_rn(float x);
-__device__ float __half2float(__half);
-from hip_fp16.h
-
-CUDA implements half as unsigned short whereas, HIP doesn't.
-
-*/
-
-__device__ static inline int __float2int_rd(float x) { return (int)__ocml_floor_f32(x); }
-__device__ static inline int __float2int_rn(float x) { return (int)__ocml_rint_f32(x); }
-__device__ static inline int __float2int_ru(float x) { return (int)__ocml_ceil_f32(x); }
-__device__ static inline int __float2int_rz(float x) { return (int)__ocml_trunc_f32(x); }
-
-__device__ static inline long long int __float2ll_rd(float x) { return (long long int)x; }
-__device__ static inline long long int __float2ll_rn(float x) { return (long long int)x; }
-__device__ static inline long long int __float2ll_ru(float x) { return (long long int)x; }
-__device__ static inline long long int __float2ll_rz(float x) { return (long long int)x; }
-
-__device__ static inline unsigned int __float2uint_rd(float x) { return (unsigned int)x; }
-__device__ static inline unsigned int __float2uint_rn(float x) { return (unsigned int)x; }
-__device__ static inline unsigned int __float2uint_ru(float x) { return (unsigned int)x; }
-__device__ static inline unsigned int __float2uint_rz(float x) { return (unsigned int)x; }
-
-__device__ static inline unsigned long long int __float2ull_rd(float x) {
-    return (unsigned long long int)x;
-}
-__device__ static inline unsigned long long int __float2ull_rn(float x) {
-    return (unsigned long long int)x;
-}
-__device__ static inline unsigned long long int __float2ull_ru(float x) {
-    return (unsigned long long int)x;
-}
-__device__ static inline unsigned long long int __float2ull_rz(float x) {
-    return (unsigned long long int)x;
-}
-
-__device__ static inline int __float_as_int(float x) {
-    static_assert(sizeof(int) == sizeof(float), "");
-
-    int tmp;
-    __builtin_memcpy(&tmp, &x, sizeof(tmp));
-
-    return tmp;
-}
-
-__device__ static inline unsigned int __float_as_uint(float x) {
-    static_assert(sizeof(unsigned int) == sizeof(float), "");
-
-    unsigned int tmp;
-    __builtin_memcpy(&tmp, &x, sizeof(tmp));
-
-    return tmp;
-}
-
-__device__ static inline double __hiloint2double(int hi, int lo) {
-    static_assert(sizeof(double) == sizeof(uint64_t), "");
-
-    uint64_t tmp0 = (static_cast<uint64_t>(hi) << 32ull) | static_cast<uint32_t>(lo);
-    double tmp1;
-    __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
-
-    return tmp1;
-}
-
-__device__ static inline double __int2double_rn(int x) { return (double)x; }
-
-__device__ static inline float __int2float_rd(int x) { return (float)x; }
-__device__ static inline float __int2float_rn(int x) { return (float)x; }
-__device__ static inline float __int2float_ru(int x) { return (float)x; }
-__device__ static inline float __int2float_rz(int x) { return (float)x; }
-
-__device__ static inline float __int_as_float(int x) {
-    static_assert(sizeof(float) == sizeof(int), "");
-
-    float tmp;
-    __builtin_memcpy(&tmp, &x, sizeof(tmp));
-
-    return tmp;
-}
-
-__device__ static inline double __ll2double_rd(long long int x) { return (double)x; }
-__device__ static inline double __ll2double_rn(long long int x) { return (double)x; }
-__device__ static inline double __ll2double_ru(long long int x) { return (double)x; }
-__device__ static inline double __ll2double_rz(long long int x) { return (double)x; }
-
-__device__ static inline float __ll2float_rd(long long int x) { return (float)x; }
-__device__ static inline float __ll2float_rn(long long int x) { return (float)x; }
-__device__ static inline float __ll2float_ru(long long int x) { return (float)x; }
-__device__ static inline float __ll2float_rz(long long int x) { return (float)x; }
-
-__device__ static inline double __longlong_as_double(long long int x) {
-    static_assert(sizeof(double) == sizeof(long long), "");
-
-    double tmp;
-    __builtin_memcpy(&tmp, &x, sizeof(tmp));
-
-    return tmp;
-}
-
-__device__ static inline double __uint2double_rn(int x) { return (double)x; }
-
-__device__ static inline float __uint2float_rd(unsigned int x) { return (float)x; }
-__device__ static inline float __uint2float_rn(unsigned int x) { return (float)x; }
-__device__ static inline float __uint2float_ru(unsigned int x) { return (float)x; }
-__device__ static inline float __uint2float_rz(unsigned int x) { return (float)x; }
-
-__device__ static inline float __uint_as_float(unsigned int x) {
-   static_assert(sizeof(float) == sizeof(unsigned int), "");
-
-    float tmp;
-    __builtin_memcpy(&tmp, &x, sizeof(tmp));
-
-    return tmp;
-}
-
-__device__ static inline double __ull2double_rd(unsigned long long int x) { return (double)x; }
-__device__ static inline double __ull2double_rn(unsigned long long int x) { return (double)x; }
-__device__ static inline double __ull2double_ru(unsigned long long int x) { return (double)x; }
-__device__ static inline double __ull2double_rz(unsigned long long int x) { return (double)x; }
-
-__device__ static inline float __ull2float_rd(unsigned long long int x) { return (float)x; }
-__device__ static inline float __ull2float_rn(unsigned long long int x) { return (float)x; }
-__device__ static inline float __ull2float_ru(unsigned long long int x) { return (float)x; }
-__device__ static inline float __ull2float_rz(unsigned long long int x) { return (float)x; }
-
-#if defined(__HCC__)
-#define __HCC_OR_HIP_CLANG__ 1
-#elif defined(__clang__) && defined(__HIP__)
-#define __HCC_OR_HIP_CLANG__ 1
-#else
-#define __HCC_OR_HIP_CLANG__ 0
-#endif
-
-#if __HCC_OR_HIP_CLANG__
-
-// Clock functions
-__device__ long long int __clock64();
-__device__ long long int __clock();
-__device__ long long int clock64();
-__device__ long long int clock();
-// hip.amdgcn.bc - named sync
-__device__ void __named_sync(int a, int b);
-
-#ifdef __HIP_DEVICE_COMPILE__
-
-// Clock functions
-#if __HCC__
-extern "C" uint64_t __clock_u64()  __HC__;
-#endif
-
-__device__
-inline  __attribute((always_inline))
-long long int __clock64() {
-return (long long int)  __builtin_readcyclecounter();
-}
-
-__device__
-inline __attribute((always_inline))
-long long int  __clock() { return __clock64(); }
-
-__device__
-inline  __attribute__((always_inline))
-long long int clock64() { return __clock64(); }
-
-__device__
-inline __attribute__((always_inline))
-long long int  clock() { return __clock(); }
-
-// hip.amdgcn.bc - named sync
-__device__
-inline
-void __named_sync(int a, int b) { __builtin_amdgcn_s_barrier(); }
-
-#endif // __HIP_DEVICE_COMPILE__
-
-// warp vote function __all __any __ballot
-__device__
-inline
-int __all(int predicate) {
-    return __ockl_wfall_i32(predicate);
-}
-
-__device__
-inline
-int __any(int predicate) {
-    return __ockl_wfany_i32(predicate);
-}
-
-// XXX from llvm/include/llvm/IR/InstrTypes.h
-#define ICMP_NE 33
-
-__device__
-inline
-unsigned long long int __ballot(int predicate) {
-    return __builtin_amdgcn_uicmp(predicate, 0, ICMP_NE);
-}
-
-__device__
-inline
-unsigned long long int __ballot64(int predicate) {
-    return __builtin_amdgcn_uicmp(predicate, 0, ICMP_NE);
-}
-
-// hip.amdgcn.bc - lanemask
-__device__
-inline
-uint64_t  __lanemask_gt()
-{
-    uint32_t lane = __ockl_lane_u32();
-    if (lane == 63)
-      return 0;
-    uint64_t ballot = __ballot64(1);
-    uint64_t mask = (~((uint64_t)0)) << (lane + 1);
-    return mask & ballot;
-}
-
-__device__
-inline
-uint64_t __lanemask_lt()
-{
-    uint32_t lane = __ockl_lane_u32();
-    int64_t ballot = __ballot64(1);
-    uint64_t mask = ((uint64_t)1 << lane) - (uint64_t)1;
-    return mask & ballot;
-}
-
-__device__
-inline
-uint64_t  __lanemask_eq()
-{
-    uint32_t lane = __ockl_lane_u32();
-    int64_t mask = ((uint64_t)1 << lane);
-    return mask;
-}
-
-
-__device__ inline void* __local_to_generic(void* p) { return p; }
-
-#ifdef __HIP_DEVICE_COMPILE__
-__device__
-inline
-void* __get_dynamicgroupbaseptr()
-{
-    // Get group segment base pointer.
-    return (char*)__local_to_generic((void*)__to_local(__llvm_amdgcn_groupstaticsize()));
-}
-#else
-__device__
-void* __get_dynamicgroupbaseptr();
-#endif // __HIP_DEVICE_COMPILE__
-
-__device__
-inline
-void *__amdgcn_get_dynamicgroupbaseptr() {
-    return __get_dynamicgroupbaseptr();
-}
-
-#if defined(__HCC__) && (__hcc_major__ < 3) && (__hcc_minor__ < 3)
-// hip.amdgcn.bc - sync threads
-#define __CLK_LOCAL_MEM_FENCE    0x01
-typedef unsigned __cl_mem_fence_flags;
-
-typedef enum __memory_scope {
-  __memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
-  __memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
-  __memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
-  __memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
-  __memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
-} __memory_scope;
-
-// enum values aligned with what clang uses in EmitAtomicExpr()
-typedef enum __memory_order
-{
-  __memory_order_relaxed = __ATOMIC_RELAXED,
-  __memory_order_acquire = __ATOMIC_ACQUIRE,
-  __memory_order_release = __ATOMIC_RELEASE,
-  __memory_order_acq_rel = __ATOMIC_ACQ_REL,
-  __memory_order_seq_cst = __ATOMIC_SEQ_CST
-} __memory_order;
-
-__device__
-inline
-static void
-__atomic_work_item_fence(__cl_mem_fence_flags flags, __memory_order order, __memory_scope scope)
-{
-    // We're tying global-happens-before and local-happens-before together as does HSA
-    if (order != __memory_order_relaxed) {
-        switch (scope) {
-        case __memory_scope_work_item:
-            break;
-        case __memory_scope_sub_group:
-            switch (order) {
-            case __memory_order_relaxed: break;
-            case __memory_order_acquire: __llvm_fence_acq_sg(); break;
-            case __memory_order_release: __llvm_fence_rel_sg(); break;
-            case __memory_order_acq_rel: __llvm_fence_ar_sg(); break;
-            case __memory_order_seq_cst: __llvm_fence_sc_sg(); break;
-            }
-            break;
-        case __memory_scope_work_group:
-            switch (order) {
-            case __memory_order_relaxed: break;
-            case __memory_order_acquire: __llvm_fence_acq_wg(); break;
-            case __memory_order_release: __llvm_fence_rel_wg(); break;
-            case __memory_order_acq_rel: __llvm_fence_ar_wg(); break;
-            case __memory_order_seq_cst: __llvm_fence_sc_wg(); break;
-            }
-            break;
-        case __memory_scope_device:
-            switch (order) {
-            case __memory_order_relaxed: break;
-            case __memory_order_acquire: __llvm_fence_acq_dev(); break;
-            case __memory_order_release: __llvm_fence_rel_dev(); break;
-            case __memory_order_acq_rel: __llvm_fence_ar_dev(); break;
-            case __memory_order_seq_cst: __llvm_fence_sc_dev(); break;
-            }
-            break;
-        case __memory_scope_all_svm_devices:
-            switch (order) {
-            case __memory_order_relaxed: break;
-            case __memory_order_acquire: __llvm_fence_acq_sys(); break;
-            case __memory_order_release: __llvm_fence_rel_sys(); break;
-            case __memory_order_acq_rel: __llvm_fence_ar_sys(); break;
-            case __memory_order_seq_cst: __llvm_fence_sc_sys(); break;
-            }
-            break;
-        }
-    }
-}
-#endif
-
-// Memory Fence Functions
-__device__
-inline
-static void __threadfence()
-{
-  __atomic_work_item_fence(0, __memory_order_seq_cst, __memory_scope_device);
-}
-
-__device__
-inline
-static void __threadfence_block()
-{
-  __atomic_work_item_fence(0, __memory_order_seq_cst, __memory_scope_work_group);
-}
-
-__device__
-inline
-static void __threadfence_system()
-{
-  __atomic_work_item_fence(0, __memory_order_seq_cst, __memory_scope_all_svm_devices);
-}
-
-// abort
-__device__
-inline
-__attribute__((weak))
-void abort() {
-    return __builtin_trap();
-}
-
-
-#endif // __HCC_OR_HIP_CLANG__
-
-#ifdef __HCC__
-
-/**
- * extern __shared__
- */
-
-// Macro to replace extern __shared__ declarations
-// to local variable definitions
-#define HIP_DYNAMIC_SHARED(type, var) type* var = (type*)__get_dynamicgroupbaseptr();
-
-#define HIP_DYNAMIC_SHARED_ATTRIBUTE
-
-
-#elif defined(__clang__) && defined(__HIP__)
-
-// The noinline attribute helps encapsulate the printf expansion,
-// which otherwise has a performance impact just by increasing the
-// size of the calling function. Additionally, the weak attribute
-// allows the function to exist as a global although its definition is
-// included in every compilation unit.
-#if defined(_WIN32) || defined(_WIN64)
-extern "C" __device__ __attribute__((noinline)) __attribute__((weak))
-void _wassert(const wchar_t *_msg, const wchar_t *_file, unsigned _line) {
-    // FIXME: Need `wchar_t` support to generate assertion message.
-    __builtin_trap();
-}
-#else /* defined(_WIN32) || defined(_WIN64) */
-extern "C" __device__ __attribute__((noinline)) __attribute__((weak))
-void __assert_fail(const char * __assertion,
-                   const char *__file,
-                   unsigned int __line,
-                   const char *__function)
-{
-    printf("%s:%u: %s: Device-side assertion `%s' failed.\n", __file, __line,
-           __function, __assertion);
-    __builtin_trap();
-}
-
-extern "C" __device__ __attribute__((noinline)) __attribute__((weak))
-void __assertfail(const char * __assertion,
-                  const char *__file,
-                  unsigned int __line,
-                  const char *__function,
-                  size_t charsize)
-{
-    // ignore all the args for now.
-    __builtin_trap();
-}
-#endif /* defined(_WIN32) || defined(_WIN64) */
-
-__device__
-inline
-static void __work_group_barrier(__cl_mem_fence_flags flags, __memory_scope scope)
-{
-    if (flags) {
-        __atomic_work_item_fence(flags, __memory_order_release, scope);
-        __builtin_amdgcn_s_barrier();
-        __atomic_work_item_fence(flags, __memory_order_acquire, scope);
-    } else {
-        __builtin_amdgcn_s_barrier();
-    }
-}
-
-__device__
-inline
-static void __barrier(int n)
-{
-  __work_group_barrier((__cl_mem_fence_flags)n, __memory_scope_work_group);
-}
-
-__device__
-inline
-__attribute__((convergent))
-void __syncthreads()
-{
-  __barrier(__CLK_LOCAL_MEM_FENCE);
-}
-
-__device__
-inline
-__attribute__((convergent))
-int __syncthreads_count(int predicate)
-{
-  return __ockl_wgred_add_i32(!!predicate);
-}
-
-__device__
-inline
-__attribute__((convergent))
-int __syncthreads_and(int predicate)
-{
-  return __ockl_wgred_and_i32(!!predicate);
-}
-
-__device__
-inline
-__attribute__((convergent))
-int __syncthreads_or(int predicate)
-{
-  return __ockl_wgred_or_i32(!!predicate);
-}
-
-// hip.amdgcn.bc - device routine
-/*
-   HW_ID Register bit structure
-   WAVE_ID     3:0     Wave buffer slot number. 0-9.
-   SIMD_ID     5:4     SIMD which the wave is assigned to within the CU.
-   PIPE_ID     7:6     Pipeline from which the wave was dispatched.
-   CU_ID       11:8    Compute Unit the wave is assigned to.
-   SH_ID       12      Shader Array (within an SE) the wave is assigned to.
-   SE_ID       14:13   Shader Engine the wave is assigned to.
-   TG_ID       19:16   Thread-group ID
-   VM_ID       23:20   Virtual Memory ID
-   QUEUE_ID    26:24   Queue from which this wave was dispatched.
-   STATE_ID    29:27   State ID (graphics only, not compute).
-   ME_ID       31:30   Micro-engine ID.
- */
-
-#define HW_ID               4
-
-#define HW_ID_CU_ID_SIZE    4
-#define HW_ID_CU_ID_OFFSET  8
-
-#define HW_ID_SE_ID_SIZE    2
-#define HW_ID_SE_ID_OFFSET  13
-
-/*
-   Encoding of parameter bitmask
-   HW_ID        5:0     HW_ID
-   OFFSET       10:6    Range: 0..31
-   SIZE         15:11   Range: 1..32
- */
-
-#define GETREG_IMMED(SZ,OFF,REG) (((SZ) << 11) | ((OFF) << 6) | (REG))
-
-/*
-  __smid returns the wave's assigned Compute Unit and Shader Engine.
-  The Compute Unit, CU_ID returned in bits 3:0, and Shader Engine, SE_ID in bits 5:4.
-  Note: the results vary over time.
-  SZ minus 1 since SIZE is 1-based.
-*/
-__device__
-inline
-unsigned __smid(void)
-{
-    unsigned cu_id = __builtin_amdgcn_s_getreg(
-            GETREG_IMMED(HW_ID_CU_ID_SIZE-1, HW_ID_CU_ID_OFFSET, HW_ID));
-    unsigned se_id = __builtin_amdgcn_s_getreg(
-            GETREG_IMMED(HW_ID_SE_ID_SIZE-1, HW_ID_SE_ID_OFFSET, HW_ID));
-
-    /* Each shader engine has 16 CU */
-    return (se_id << HW_ID_CU_ID_SIZE) + cu_id;
-}
-
-// Macro to replace extern __shared__ declarations
-// to local variable definitions
-#define HIP_DYNAMIC_SHARED(type, var) \
-    type* var = (type*)__amdgcn_get_dynamicgroupbaseptr();
-
-#define HIP_DYNAMIC_SHARED_ATTRIBUTE
-
-
-#endif //defined(__clang__) && defined(__HIP__)
-
-
-// loop unrolling
-static inline __device__ void* __hip_hc_memcpy(void* dst, const void* src, size_t size) {
-    auto dstPtr = static_cast<unsigned char*>(dst);
-    auto srcPtr = static_cast<const unsigned char*>(src);
-
-    while (size >= 4u) {
-        dstPtr[0] = srcPtr[0];
-        dstPtr[1] = srcPtr[1];
-        dstPtr[2] = srcPtr[2];
-        dstPtr[3] = srcPtr[3];
-
-        size -= 4u;
-        srcPtr += 4u;
-        dstPtr += 4u;
-    }
-    switch (size) {
-        case 3:
-            dstPtr[2] = srcPtr[2];
-        case 2:
-            dstPtr[1] = srcPtr[1];
-        case 1:
-            dstPtr[0] = srcPtr[0];
-    }
-
-    return dst;
-}
-
-static inline __device__ void* __hip_hc_memset(void* dst, unsigned char val, size_t size) {
-    auto dstPtr = static_cast<unsigned char*>(dst);
-
-    while (size >= 4u) {
-        dstPtr[0] = val;
-        dstPtr[1] = val;
-        dstPtr[2] = val;
-        dstPtr[3] = val;
-
-        size -= 4u;
-        dstPtr += 4u;
-    }
-    switch (size) {
-        case 3:
-            dstPtr[2] = val;
-        case 2:
-            dstPtr[1] = val;
-        case 1:
-            dstPtr[0] = val;
-    }
-
-    return dst;
-}
-#ifndef __OPENMP_AMDGCN__
-static inline __device__ void* memcpy(void* dst, const void* src, size_t size) {
-    return __hip_hc_memcpy(dst, src, size);
-}
-
-static inline __device__ void* memset(void* ptr, int val, size_t size) {
-    unsigned char val8 = static_cast<unsigned char>(val);
-    return __hip_hc_memset(ptr, val8, size);
-}
-#endif // !__OPENMP_AMDGCN__
-#endif
diff --git a/third_party/rocm/include/hip/hcc_detail/device_library_decls.h b/third_party/rocm/include/hip/hcc_detail/device_library_decls.h
deleted file mode 100644
index 90aef16..0000000
--- a/third_party/rocm/include/hip/hcc_detail/device_library_decls.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/**
- *  @file  hcc_detail/device_library_decls.h
- *  @brief Contains declarations for types and functions in device library.
- */
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_DEVICE_LIBRARY_DECLS_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_DEVICE_LIBRARY_DECLS_H
-
-#include "hip/hcc_detail/host_defines.h"
-
-typedef unsigned char uchar;
-typedef unsigned short ushort;
-typedef unsigned int uint;
-typedef unsigned long ulong;
-typedef unsigned long long ullong;
-
-extern "C" __device__ __attribute__((const)) bool __ockl_wfany_i32(int);
-extern "C" __device__ __attribute__((const)) bool __ockl_wfall_i32(int);
-extern "C" __device__ uint __ockl_activelane_u32(void);
-
-extern "C" __device__ __attribute__((const)) uint __ockl_mul24_u32(uint, uint);
-extern "C" __device__ __attribute__((const)) int __ockl_mul24_i32(int, int);
-extern "C" __device__ __attribute__((const)) uint __ockl_mul_hi_u32(uint, uint);
-extern "C" __device__ __attribute__((const)) int __ockl_mul_hi_i32(int, int);
-extern "C" __device__ __attribute__((const)) uint __ockl_sadd_u32(uint, uint, uint);
-
-extern "C" __device__ __attribute__((const)) uchar __ockl_clz_u8(uchar);
-extern "C" __device__ __attribute__((const)) ushort __ockl_clz_u16(ushort);
-extern "C" __device__ __attribute__((const)) uint __ockl_clz_u32(uint);
-extern "C" __device__ __attribute__((const)) ullong __ockl_clz_u64(ullong);
-
-extern "C" __device__ __attribute__((const)) float __ocml_floor_f32(float);
-extern "C" __device__ __attribute__((const)) float __ocml_rint_f32(float);
-extern "C" __device__ __attribute__((const)) float __ocml_ceil_f32(float);
-extern "C" __device__ __attribute__((const)) float __ocml_trunc_f32(float);
-
-extern "C" __device__ __attribute__((const)) float __ocml_fmin_f32(float, float);
-extern "C" __device__ __attribute__((const)) float __ocml_fmax_f32(float, float);
-
-extern "C" __device__ __attribute__((convergent)) void __ockl_gws_init(uint nwm1, uint rid);
-extern "C" __device__ __attribute__((convergent)) void __ockl_gws_barrier(uint nwm1, uint rid);
-
-extern "C" __device__ __attribute__((const)) uint32_t __ockl_lane_u32();
-extern "C" __device__ __attribute__((const)) int __ockl_grid_is_valid(void);
-extern "C" __device__ __attribute__((convergent)) void __ockl_grid_sync(void);
-extern "C" __device__ __attribute__((const)) uint __ockl_multi_grid_num_grids(void);
-extern "C" __device__ __attribute__((const)) uint __ockl_multi_grid_grid_rank(void);
-extern "C" __device__ __attribute__((const)) uint __ockl_multi_grid_size(void);
-extern "C" __device__ __attribute__((const)) uint __ockl_multi_grid_thread_rank(void);
-extern "C" __device__ __attribute__((const)) int __ockl_multi_grid_is_valid(void);
-extern "C" __device__ __attribute__((convergent)) void __ockl_multi_grid_sync(void);
-
-extern "C" __device__ void __ockl_atomic_add_noret_f32(float*, float);
-
-extern "C" __device__ __attribute__((convergent)) int __ockl_wgred_add_i32(int a);
-extern "C" __device__ __attribute__((convergent)) int __ockl_wgred_and_i32(int a);
-extern "C" __device__ __attribute__((convergent)) int __ockl_wgred_or_i32(int a);
-
-
-// Introduce local address space
-#define __local __attribute__((address_space(3)))
-
-#ifdef __HIP_DEVICE_COMPILE__
-__device__ inline static __local void* __to_local(unsigned x) { return (__local void*)x; }
-#endif //__HIP_DEVICE_COMPILE__
-
-#if defined(__HCC__) && (__hcc_major__ < 3) && (__hcc_minor__ < 3)
-// __llvm_fence* functions from device-libs/irif/src/fence.ll
-extern "C" __device__ void __llvm_fence_acq_sg(void);
-extern "C" __device__ void __llvm_fence_acq_wg(void);
-extern "C" __device__ void __llvm_fence_acq_dev(void);
-extern "C" __device__ void __llvm_fence_acq_sys(void);
-
-extern "C" __device__ void __llvm_fence_rel_sg(void);
-extern "C" __device__ void __llvm_fence_rel_wg(void);
-extern "C" __device__ void __llvm_fence_rel_dev(void);
-extern "C" __device__ void __llvm_fence_rel_sys(void);
-
-extern "C" __device__ void __llvm_fence_ar_sg(void);
-extern "C" __device__ void __llvm_fence_ar_wg(void);
-extern "C" __device__ void __llvm_fence_ar_dev(void);
-extern "C" __device__ void __llvm_fence_ar_sys(void);
-
-
-extern "C" __device__ void __llvm_fence_sc_sg(void);
-extern "C" __device__ void __llvm_fence_sc_wg(void);
-extern "C" __device__ void __llvm_fence_sc_dev(void);
-extern "C" __device__ void __llvm_fence_sc_sys(void);
-#else
-// Using hip.amdgcn.bc - sync threads
-#define __CLK_LOCAL_MEM_FENCE    0x01
-typedef unsigned __cl_mem_fence_flags;
-
-typedef enum __memory_scope {
-  __memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
-  __memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
-  __memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
-  __memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
-  __memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
-} __memory_scope;
-
-// enum values aligned with what clang uses in EmitAtomicExpr()
-typedef enum __memory_order
-{
-  __memory_order_relaxed = __ATOMIC_RELAXED,
-  __memory_order_acquire = __ATOMIC_ACQUIRE,
-  __memory_order_release = __ATOMIC_RELEASE,
-  __memory_order_acq_rel = __ATOMIC_ACQ_REL,
-  __memory_order_seq_cst = __ATOMIC_SEQ_CST
-} __memory_order;
-
-// Linked from hip.amdgcn.bc
-extern "C" __device__ void
-__atomic_work_item_fence(__cl_mem_fence_flags, __memory_order, __memory_scope);
-#endif
-
-#endif
diff --git a/third_party/rocm/include/hip/hcc_detail/driver_types.h b/third_party/rocm/include/hip/hcc_detail/driver_types.h
deleted file mode 100644
index 7db78e5..0000000
--- a/third_party/rocm/include/hip/hcc_detail/driver_types.h
+++ /dev/null
@@ -1,466 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_DRIVER_TYPES_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_DRIVER_TYPES_H
-
-#ifndef __cplusplus
-#include <stdbool.h>
-#endif
-
-typedef void* hipDeviceptr_t;
-typedef enum hipChannelFormatKind {
-    hipChannelFormatKindSigned = 0,
-    hipChannelFormatKindUnsigned = 1,
-    hipChannelFormatKindFloat = 2,
-    hipChannelFormatKindNone = 3
-}hipChannelFormatKind;
-
-typedef struct hipChannelFormatDesc {
-    int x;
-    int y;
-    int z;
-    int w;
-    enum hipChannelFormatKind f;
-}hipChannelFormatDesc;
-
-#define HIP_TRSA_OVERRIDE_FORMAT 0x01
-#define HIP_TRSF_READ_AS_INTEGER 0x01
-#define HIP_TRSF_NORMALIZED_COORDINATES 0x02
-#define HIP_TRSF_SRGB 0x10
-
-typedef enum hipArray_Format {
-    HIP_AD_FORMAT_UNSIGNED_INT8 = 0x01,
-    HIP_AD_FORMAT_UNSIGNED_INT16 = 0x02,
-    HIP_AD_FORMAT_UNSIGNED_INT32 = 0x03,
-    HIP_AD_FORMAT_SIGNED_INT8 = 0x08,
-    HIP_AD_FORMAT_SIGNED_INT16 = 0x09,
-    HIP_AD_FORMAT_SIGNED_INT32 = 0x0a,
-    HIP_AD_FORMAT_HALF = 0x10,
-    HIP_AD_FORMAT_FLOAT = 0x20
-}hipArray_Format;
-
-typedef struct HIP_ARRAY_DESCRIPTOR {
-  size_t Width;
-  size_t Height;
-  enum hipArray_Format Format;
-  unsigned int NumChannels;
-}HIP_ARRAY_DESCRIPTOR;
-
-typedef struct HIP_ARRAY3D_DESCRIPTOR {
-  size_t Width;
-  size_t Height;
-  size_t Depth;
-  enum hipArray_Format Format;
-  unsigned int NumChannels;
-  unsigned int Flags;
-}HIP_ARRAY3D_DESCRIPTOR;
-
-typedef struct hipArray {
-    void* data;  // FIXME: generalize this
-    struct hipChannelFormatDesc desc;
-    unsigned int type;
-    unsigned int width;
-    unsigned int height;
-    unsigned int depth;
-    enum hipArray_Format Format;
-    unsigned int NumChannels;
-    bool isDrv;
-    unsigned int textureType;
-}hipArray;
-
-typedef struct hip_Memcpy2D {
-    size_t srcXInBytes;
-    size_t srcY;
-    hipMemoryType srcMemoryType;
-    const void* srcHost;
-    hipDeviceptr_t srcDevice;
-    hipArray* srcArray;
-    size_t srcPitch;
-    size_t dstXInBytes;
-    size_t dstY;
-    hipMemoryType dstMemoryType;
-    void* dstHost;
-    hipDeviceptr_t dstDevice;
-    hipArray* dstArray;
-    size_t dstPitch;
-    size_t WidthInBytes;
-    size_t Height;
-} hip_Memcpy2D;
-
-
-typedef struct hipArray* hipArray_t;
-typedef hipArray_t hiparray;
-typedef const struct hipArray* hipArray_const_t;
-
-// TODO: It needs to be modified since it was just copied from hipArray.
-struct hipMipmappedArray {
-    void* data;  // FIXME: generalize this
-    struct hipChannelFormatDesc desc;
-    unsigned int width;
-    unsigned int height;
-    unsigned int depth;
-};
-
-typedef struct hipMipmappedArray* hipMipmappedArray_t;
-
-typedef const struct hipMipmappedArray* hipMipmappedArray_const_t;
-
-/**
- * hip resource types
- */
-typedef enum hipResourceType {
-    hipResourceTypeArray = 0x00,
-    hipResourceTypeMipmappedArray = 0x01,
-    hipResourceTypeLinear = 0x02,
-    hipResourceTypePitch2D = 0x03
-}hipResourceType;
-
-typedef enum HIPresourcetype_enum {
-    HIP_RESOURCE_TYPE_ARRAY           = 0x00, /**< Array resoure */
-    HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x01, /**< Mipmapped array resource */
-    HIP_RESOURCE_TYPE_LINEAR          = 0x02, /**< Linear resource */
-    HIP_RESOURCE_TYPE_PITCH2D         = 0x03  /**< Pitch 2D resource */
-} HIPresourcetype;
-
-/**
- * hip address modes
- */
-typedef enum HIPaddress_mode_enum {
-    HIP_TR_ADDRESS_MODE_WRAP   = 0,
-    HIP_TR_ADDRESS_MODE_CLAMP  = 1,
-    HIP_TR_ADDRESS_MODE_MIRROR = 2,
-    HIP_TR_ADDRESS_MODE_BORDER = 3
-} HIPaddress_mode;
-
-/**
- * hip filter modes
- */
-typedef enum HIPfilter_mode_enum {
-    HIP_TR_FILTER_MODE_POINT  = 0,
-    HIP_TR_FILTER_MODE_LINEAR = 1
-} HIPfilter_mode;
-
-/**
- * Texture descriptor
- */
-typedef struct HIP_TEXTURE_DESC_st {
-    HIPaddress_mode addressMode[3];  /**< Address modes */
-    HIPfilter_mode filterMode;       /**< Filter mode */
-    unsigned int flags;              /**< Flags */
-    unsigned int maxAnisotropy;      /**< Maximum anisotropy ratio */
-    HIPfilter_mode mipmapFilterMode; /**< Mipmap filter mode */
-    float mipmapLevelBias;           /**< Mipmap level bias */
-    float minMipmapLevelClamp;       /**< Mipmap minimum level clamp */
-    float maxMipmapLevelClamp;       /**< Mipmap maximum level clamp */
-    float borderColor[4];            /**< Border Color */
-    int reserved[12];
-} HIP_TEXTURE_DESC;
-
-/**
- * hip texture resource view formats
- */
-typedef enum hipResourceViewFormat {
-    hipResViewFormatNone = 0x00,
-    hipResViewFormatUnsignedChar1 = 0x01,
-    hipResViewFormatUnsignedChar2 = 0x02,
-    hipResViewFormatUnsignedChar4 = 0x03,
-    hipResViewFormatSignedChar1 = 0x04,
-    hipResViewFormatSignedChar2 = 0x05,
-    hipResViewFormatSignedChar4 = 0x06,
-    hipResViewFormatUnsignedShort1 = 0x07,
-    hipResViewFormatUnsignedShort2 = 0x08,
-    hipResViewFormatUnsignedShort4 = 0x09,
-    hipResViewFormatSignedShort1 = 0x0a,
-    hipResViewFormatSignedShort2 = 0x0b,
-    hipResViewFormatSignedShort4 = 0x0c,
-    hipResViewFormatUnsignedInt1 = 0x0d,
-    hipResViewFormatUnsignedInt2 = 0x0e,
-    hipResViewFormatUnsignedInt4 = 0x0f,
-    hipResViewFormatSignedInt1 = 0x10,
-    hipResViewFormatSignedInt2 = 0x11,
-    hipResViewFormatSignedInt4 = 0x12,
-    hipResViewFormatHalf1 = 0x13,
-    hipResViewFormatHalf2 = 0x14,
-    hipResViewFormatHalf4 = 0x15,
-    hipResViewFormatFloat1 = 0x16,
-    hipResViewFormatFloat2 = 0x17,
-    hipResViewFormatFloat4 = 0x18,
-    hipResViewFormatUnsignedBlockCompressed1 = 0x19,
-    hipResViewFormatUnsignedBlockCompressed2 = 0x1a,
-    hipResViewFormatUnsignedBlockCompressed3 = 0x1b,
-    hipResViewFormatUnsignedBlockCompressed4 = 0x1c,
-    hipResViewFormatSignedBlockCompressed4 = 0x1d,
-    hipResViewFormatUnsignedBlockCompressed5 = 0x1e,
-    hipResViewFormatSignedBlockCompressed5 = 0x1f,
-    hipResViewFormatUnsignedBlockCompressed6H = 0x20,
-    hipResViewFormatSignedBlockCompressed6H = 0x21,
-    hipResViewFormatUnsignedBlockCompressed7 = 0x22
-}hipResourceViewFormat;
-
-typedef enum HIPresourceViewFormat_enum
-{
-    HIP_RES_VIEW_FORMAT_NONE          = 0x00, /**< No resource view format (use underlying resource format) */
-    HIP_RES_VIEW_FORMAT_UINT_1X8      = 0x01, /**< 1 channel unsigned 8-bit integers */
-    HIP_RES_VIEW_FORMAT_UINT_2X8      = 0x02, /**< 2 channel unsigned 8-bit integers */
-    HIP_RES_VIEW_FORMAT_UINT_4X8      = 0x03, /**< 4 channel unsigned 8-bit integers */
-    HIP_RES_VIEW_FORMAT_SINT_1X8      = 0x04, /**< 1 channel signed 8-bit integers */
-    HIP_RES_VIEW_FORMAT_SINT_2X8      = 0x05, /**< 2 channel signed 8-bit integers */
-    HIP_RES_VIEW_FORMAT_SINT_4X8      = 0x06, /**< 4 channel signed 8-bit integers */
-    HIP_RES_VIEW_FORMAT_UINT_1X16     = 0x07, /**< 1 channel unsigned 16-bit integers */
-    HIP_RES_VIEW_FORMAT_UINT_2X16     = 0x08, /**< 2 channel unsigned 16-bit integers */
-    HIP_RES_VIEW_FORMAT_UINT_4X16     = 0x09, /**< 4 channel unsigned 16-bit integers */
-    HIP_RES_VIEW_FORMAT_SINT_1X16     = 0x0a, /**< 1 channel signed 16-bit integers */
-    HIP_RES_VIEW_FORMAT_SINT_2X16     = 0x0b, /**< 2 channel signed 16-bit integers */
-    HIP_RES_VIEW_FORMAT_SINT_4X16     = 0x0c, /**< 4 channel signed 16-bit integers */
-    HIP_RES_VIEW_FORMAT_UINT_1X32     = 0x0d, /**< 1 channel unsigned 32-bit integers */
-    HIP_RES_VIEW_FORMAT_UINT_2X32     = 0x0e, /**< 2 channel unsigned 32-bit integers */
-    HIP_RES_VIEW_FORMAT_UINT_4X32     = 0x0f, /**< 4 channel unsigned 32-bit integers */
-    HIP_RES_VIEW_FORMAT_SINT_1X32     = 0x10, /**< 1 channel signed 32-bit integers */
-    HIP_RES_VIEW_FORMAT_SINT_2X32     = 0x11, /**< 2 channel signed 32-bit integers */
-    HIP_RES_VIEW_FORMAT_SINT_4X32     = 0x12, /**< 4 channel signed 32-bit integers */
-    HIP_RES_VIEW_FORMAT_FLOAT_1X16    = 0x13, /**< 1 channel 16-bit floating point */
-    HIP_RES_VIEW_FORMAT_FLOAT_2X16    = 0x14, /**< 2 channel 16-bit floating point */
-    HIP_RES_VIEW_FORMAT_FLOAT_4X16    = 0x15, /**< 4 channel 16-bit floating point */
-    HIP_RES_VIEW_FORMAT_FLOAT_1X32    = 0x16, /**< 1 channel 32-bit floating point */
-    HIP_RES_VIEW_FORMAT_FLOAT_2X32    = 0x17, /**< 2 channel 32-bit floating point */
-    HIP_RES_VIEW_FORMAT_FLOAT_4X32    = 0x18, /**< 4 channel 32-bit floating point */
-    HIP_RES_VIEW_FORMAT_UNSIGNED_BC1  = 0x19, /**< Block compressed 1 */
-    HIP_RES_VIEW_FORMAT_UNSIGNED_BC2  = 0x1a, /**< Block compressed 2 */
-    HIP_RES_VIEW_FORMAT_UNSIGNED_BC3  = 0x1b, /**< Block compressed 3 */
-    HIP_RES_VIEW_FORMAT_UNSIGNED_BC4  = 0x1c, /**< Block compressed 4 unsigned */
-    HIP_RES_VIEW_FORMAT_SIGNED_BC4    = 0x1d, /**< Block compressed 4 signed */
-    HIP_RES_VIEW_FORMAT_UNSIGNED_BC5  = 0x1e, /**< Block compressed 5 unsigned */
-    HIP_RES_VIEW_FORMAT_SIGNED_BC5    = 0x1f, /**< Block compressed 5 signed */
-    HIP_RES_VIEW_FORMAT_UNSIGNED_BC6H = 0x20, /**< Block compressed 6 unsigned half-float */
-    HIP_RES_VIEW_FORMAT_SIGNED_BC6H   = 0x21, /**< Block compressed 6 signed half-float */
-    HIP_RES_VIEW_FORMAT_UNSIGNED_BC7  = 0x22  /**< Block compressed 7 */
-} HIPresourceViewFormat;
-
-/**
- * HIP resource descriptor
- */
-typedef struct hipResourceDesc {
-    enum hipResourceType resType;
-
-    union {
-        struct {
-            hipArray_t array;
-        } array;
-        struct {
-            hipMipmappedArray_t mipmap;
-        } mipmap;
-        struct {
-            void* devPtr;
-            struct hipChannelFormatDesc desc;
-            size_t sizeInBytes;
-        } linear;
-        struct {
-            void* devPtr;
-            struct hipChannelFormatDesc desc;
-            size_t width;
-            size_t height;
-            size_t pitchInBytes;
-        } pitch2D;
-    } res;
-}hipResourceDesc;
-
-typedef struct HIP_RESOURCE_DESC_st
-{
-    HIPresourcetype resType;                     /**< Resource type */
-
-    union {
-        struct {
-            hipArray_t hArray;                   /**< HIP array */
-        } array;
-        struct {
-            hipMipmappedArray_t hMipmappedArray; /**< HIP mipmapped array */
-        } mipmap;
-        struct {
-            hipDeviceptr_t devPtr;               /**< Device pointer */
-            hipArray_Format format;              /**< Array format */
-            unsigned int numChannels;            /**< Channels per array element */
-            size_t sizeInBytes;                  /**< Size in bytes */
-        } linear;
-        struct {
-            hipDeviceptr_t devPtr;               /**< Device pointer */
-            hipArray_Format format;              /**< Array format */
-            unsigned int numChannels;            /**< Channels per array element */
-            size_t width;                        /**< Width of the array in elements */
-            size_t height;                       /**< Height of the array in elements */
-            size_t pitchInBytes;                 /**< Pitch between two rows in bytes */
-        } pitch2D;
-        struct {
-            int reserved[32];
-        } reserved;
-    } res;
-
-    unsigned int flags;                          /**< Flags (must be zero) */
-} HIP_RESOURCE_DESC;
-
-/**
- * hip resource view descriptor
- */
-struct hipResourceViewDesc {
-    enum hipResourceViewFormat format;
-    size_t width;
-    size_t height;
-    size_t depth;
-    unsigned int firstMipmapLevel;
-    unsigned int lastMipmapLevel;
-    unsigned int firstLayer;
-    unsigned int lastLayer;
-};
-
-/**
- * Resource view descriptor
- */
-typedef struct HIP_RESOURCE_VIEW_DESC_st
-{
-    HIPresourceViewFormat format;   /**< Resource view format */
-    size_t width;                   /**< Width of the resource view */
-    size_t height;                  /**< Height of the resource view */
-    size_t depth;                   /**< Depth of the resource view */
-    unsigned int firstMipmapLevel;  /**< First defined mipmap level */
-    unsigned int lastMipmapLevel;   /**< Last defined mipmap level */
-    unsigned int firstLayer;        /**< First layer index */
-    unsigned int lastLayer;         /**< Last layer index */
-    unsigned int reserved[16];
-} HIP_RESOURCE_VIEW_DESC;
-
-/**
- * Memory copy types
- *
- */
-typedef enum hipMemcpyKind {
-    hipMemcpyHostToHost = 0,      ///< Host-to-Host Copy
-    hipMemcpyHostToDevice = 1,    ///< Host-to-Device Copy
-    hipMemcpyDeviceToHost = 2,    ///< Device-to-Host Copy
-    hipMemcpyDeviceToDevice = 3,  ///< Device-to-Device Copy
-    hipMemcpyDefault =
-        4  ///< Runtime will automatically determine copy-kind based on virtual addresses.
-} hipMemcpyKind;
-
-typedef struct hipPitchedPtr {
-    void* ptr;
-    size_t pitch;
-    size_t xsize;
-    size_t ysize;
-}hipPitchedPtr;
-
-typedef struct hipExtent {
-    size_t width;  // Width in elements when referring to array memory, in bytes when referring to
-                   // linear memory
-    size_t height;
-    size_t depth;
-}hipExtent;
-
-typedef struct hipPos {
-    size_t x;
-    size_t y;
-    size_t z;
-}hipPos;
-
-typedef struct hipMemcpy3DParms {
-    hipArray_t srcArray;
-    struct hipPos srcPos;
-    struct hipPitchedPtr srcPtr;
-    hipArray_t dstArray;
-    struct hipPos dstPos;
-    struct hipPitchedPtr dstPtr;
-    struct hipExtent extent;
-    enum hipMemcpyKind kind;
-} hipMemcpy3DParms;
-
-typedef struct HIP_MEMCPY3D {
-  unsigned int srcXInBytes;
-  unsigned int srcY;
-  unsigned int srcZ;
-  unsigned int srcLOD;
-  hipMemoryType srcMemoryType;
-  const void* srcHost;
-  hipDeviceptr_t srcDevice;
-  hipArray_t srcArray;
-  unsigned int srcPitch;
-  unsigned int srcHeight;
-  unsigned int dstXInBytes;
-  unsigned int dstY;
-  unsigned int dstZ;
-  unsigned int dstLOD;
-  hipMemoryType dstMemoryType;
-  void* dstHost;
-  hipDeviceptr_t dstDevice;
-  hipArray_t dstArray;
-  unsigned int dstPitch;
-  unsigned int dstHeight;
-  unsigned int WidthInBytes;
-  unsigned int Height;
-  unsigned int Depth;
-} HIP_MEMCPY3D;
-
-static inline struct hipPitchedPtr make_hipPitchedPtr(void* d, size_t p, size_t xsz,
-                                                          size_t ysz) {
-    struct hipPitchedPtr s;
-
-    s.ptr = d;
-    s.pitch = p;
-    s.xsize = xsz;
-    s.ysize = ysz;
-
-    return s;
-}
-
-static inline struct hipPos make_hipPos(size_t x, size_t y, size_t z) {
-    struct hipPos p;
-
-    p.x = x;
-    p.y = y;
-    p.z = z;
-
-    return p;
-}
-
-static inline struct hipExtent make_hipExtent(size_t w, size_t h, size_t d) {
-    struct hipExtent e;
-
-    e.width = w;
-    e.height = h;
-    e.depth = d;
-
-    return e;
-}
-
-typedef enum hipFunction_attribute {
-    HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
-    HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES,
-    HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES,
-    HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES,
-    HIP_FUNC_ATTRIBUTE_NUM_REGS,
-    HIP_FUNC_ATTRIBUTE_PTX_VERSION,
-    HIP_FUNC_ATTRIBUTE_BINARY_VERSION,
-    HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA,
-    HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES,
-    HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT,
-    HIP_FUNC_ATTRIBUTE_MAX
-}hipFunction_attribute;
-
-#endif
diff --git a/third_party/rocm/include/hip/hcc_detail/elfio/elf_types.hpp b/third_party/rocm/include/hip/hcc_detail/elfio/elf_types.hpp
deleted file mode 100644
index a17b700..0000000
--- a/third_party/rocm/include/hip/hcc_detail/elfio/elf_types.hpp
+++ /dev/null
@@ -1,748 +0,0 @@
-/*
-Copyright (C) 2001-2015 by Serge Lamikhov-Center
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef ELFTYPES_H
-#define ELFTYPES_H
-
-#ifndef ELFIO_NO_OWN_TYPES
-#if !defined(ELFIO_NO_CSTDINT) && !defined(ELFIO_NO_INTTYPES)
-#include <stdint.h>
-#else
-typedef unsigned char uint8_t;
-typedef signed char int8_t;
-typedef unsigned short uint16_t;
-typedef signed short int16_t;
-#ifdef _MSC_VER
-typedef unsigned __int32 uint32_t;
-typedef signed __int32 int32_t;
-typedef unsigned __int64 uint64_t;
-typedef signed __int64 int64_t;
-#else
-typedef unsigned int uint32_t;
-typedef signed int int32_t;
-typedef unsigned long long uint64_t;
-typedef signed long long int64_t;
-#endif  // _MSC_VER
-#endif  // ELFIO_NO_CSTDINT
-#endif  // ELFIO_NO_OWN_TYPES
-
-namespace ELFIO {
-
-// Attention! Platform depended definitions.
-typedef uint16_t Elf_Half;
-typedef uint32_t Elf_Word;
-typedef int32_t Elf_Sword;
-typedef uint64_t Elf_Xword;
-typedef int64_t Elf_Sxword;
-
-typedef uint32_t Elf32_Addr;
-typedef uint32_t Elf32_Off;
-typedef uint64_t Elf64_Addr;
-typedef uint64_t Elf64_Off;
-
-#define Elf32_Half Elf_Half
-#define Elf64_Half Elf_Half
-#define Elf32_Word Elf_Word
-#define Elf64_Word Elf_Word
-#define Elf32_Sword Elf_Sword
-#define Elf64_Sword Elf_Sword
-
-///////////////////////
-// ELF Header Constants
-
-// File type
-#define ET_NONE 0
-#define ET_REL 1
-#define ET_EXEC 2
-#define ET_DYN 3
-#define ET_CORE 4
-#define ET_LOOS 0xFE00
-#define ET_HIOS 0xFEFF
-#define ET_LOPROC 0xFF00
-#define ET_HIPROC 0xFFFF
-
-
-#define EM_NONE 0          // No machine
-#define EM_M32 1           // AT&T WE 32100
-#define EM_SPARC 2         // SUN SPARC
-#define EM_386 3           // Intel 80386
-#define EM_68K 4           // Motorola m68k family
-#define EM_88K 5           // Motorola m88k family
-#define EM_486 6           // Intel 80486// Reserved for future use
-#define EM_860 7           // Intel 80860
-#define EM_MIPS 8          // MIPS R3000 (officially, big-endian only)
-#define EM_S370 9          // IBM System/370
-#define EM_MIPS_RS3_LE 10  // MIPS R3000 little-endian (Oct 4 1999 Draft) Deprecated
-#define EM_res011 11       // Reserved
-#define EM_res012 12       // Reserved
-#define EM_res013 13       // Reserved
-#define EM_res014 14       // Reserved
-#define EM_PARISC 15       // HPPA
-#define EM_res016 16       // Reserved
-#define EM_VPP550 17       // Fujitsu VPP500
-#define EM_SPARC32PLUS 18  // Sun's "v8plus"
-#define EM_960 19          // Intel 80960
-#define EM_PPC 20          // PowerPC
-#define EM_PPC64 21        // 64-bit PowerPC
-#define EM_S390 22         // IBM S/390
-#define EM_SPU 23          // Sony/Toshiba/IBM SPU
-#define EM_res024 24       // Reserved
-#define EM_res025 25       // Reserved
-#define EM_res026 26       // Reserved
-#define EM_res027 27       // Reserved
-#define EM_res028 28       // Reserved
-#define EM_res029 29       // Reserved
-#define EM_res030 30       // Reserved
-#define EM_res031 31       // Reserved
-#define EM_res032 32       // Reserved
-#define EM_res033 33       // Reserved
-#define EM_res034 34       // Reserved
-#define EM_res035 35       // Reserved
-#define EM_V800 36         // NEC V800 series
-#define EM_FR20 37         // Fujitsu FR20
-#define EM_RH32 38         // TRW RH32
-#define EM_MCORE 39        // Motorola M*Core // May also be taken by Fujitsu MMA
-#define EM_RCE 39          // Old name for MCore
-#define EM_ARM 40          // ARM
-#define EM_OLD_ALPHA 41    // Digital Alpha
-#define EM_SH 42           // Renesas (formerly Hitachi) / SuperH SH
-#define EM_SPARCV9 43      // SPARC v9 64-bit
-#define EM_TRICORE 44      // Siemens Tricore embedded processor
-#define EM_ARC 45          // ARC Cores
-#define EM_H8_300 46       // Renesas (formerly Hitachi) H8/300
-#define EM_H8_300H 47      // Renesas (formerly Hitachi) H8/300H
-#define EM_H8S 48          // Renesas (formerly Hitachi) H8S
-#define EM_H8_500 49       // Renesas (formerly Hitachi) H8/500
-#define EM_IA_64 50        // Intel IA-64 Processor
-#define EM_MIPS_X 51       // Stanford MIPS-X
-#define EM_COLDFIRE 52     // Motorola Coldfire
-#define EM_68HC12 53       // Motorola M68HC12
-#define EM_MMA 54          // Fujitsu Multimedia Accelerator
-#define EM_PCP 55          // Siemens PCP
-#define EM_NCPU 56         // Sony nCPU embedded RISC processor
-#define EM_NDR1 57         // Denso NDR1 microprocesspr
-#define EM_STARCORE 58     // Motorola Star*Core processor
-#define EM_ME16 59         // Toyota ME16 processor
-#define EM_ST100 60        // STMicroelectronics ST100 processor
-#define EM_TINYJ 61        // Advanced Logic Corp. TinyJ embedded processor
-#define EM_X86_64 62       // Advanced Micro Devices X86-64 processor
-#define EM_PDSP 63         // Sony DSP Processor
-#define EM_PDP10 64        // Digital Equipment Corp. PDP-10
-#define EM_PDP11 65        // Digital Equipment Corp. PDP-11
-#define EM_FX66 66         // Siemens FX66 microcontroller
-#define EM_ST9PLUS 67      // STMicroelectronics ST9+ 8/16 bit microcontroller
-#define EM_ST7 68          // STMicroelectronics ST7 8-bit microcontroller
-#define EM_68HC16 69       // Motorola MC68HC16 Microcontroller
-#define EM_68HC11 70       // Motorola MC68HC11 Microcontroller
-#define EM_68HC08 71       // Motorola MC68HC08 Microcontroller
-#define EM_68HC05 72       // Motorola MC68HC05 Microcontroller
-#define EM_SVX 73          // Silicon Graphics SVx
-#define EM_ST19 74         // STMicroelectronics ST19 8-bit cpu
-#define EM_VAX 75          // Digital VAX
-#define EM_CRIS 76         // Axis Communications 32-bit embedded processor
-#define EM_JAVELIN 77      // Infineon Technologies 32-bit embedded cpu
-#define EM_FIREPATH 78     // Element 14 64-bit DSP processor
-#define EM_ZSP 79          // LSI Logic's 16-bit DSP processor
-#define EM_MMIX 80         // Donald Knuth's educational 64-bit processor
-#define EM_HUANY 81        // Harvard's machine-independent format
-#define EM_PRISM 82        // SiTera Prism
-#define EM_AVR 83          // Atmel AVR 8-bit microcontroller
-#define EM_FR30 84         // Fujitsu FR30
-#define EM_D10V 85         // Mitsubishi D10V
-#define EM_D30V 86         // Mitsubishi D30V
-#define EM_V850 87         // NEC v850
-#define EM_M32R 88         // Renesas M32R (formerly Mitsubishi M32R)
-#define EM_MN10300 89      // Matsushita MN10300
-#define EM_MN10200 90      // Matsushita MN10200
-#define EM_PJ 91           // picoJava
-#define EM_OPENRISC 92     // OpenRISC 32-bit embedded processor
-#define EM_ARC_A5 93       // ARC Cores Tangent-A5
-#define EM_XTENSA 94       // Tensilica Xtensa Architecture
-#define EM_VIDEOCORE 95    // Alphamosaic VideoCore processor
-#define EM_TMM_GPP 96      // Thompson Multimedia General Purpose Processor
-#define EM_NS32K 97        // National Semiconductor 32000 series
-#define EM_TPC 98          // Tenor Network TPC processor
-#define EM_SNP1K 99        // Trebia SNP 1000 processor
-#define EM_ST200 100       // STMicroelectronics ST200 microcontroller
-#define EM_IP2K 101        // Ubicom IP2022 micro controller
-#define EM_MAX 102         // MAX Processor
-#define EM_CR 103          // National Semiconductor CompactRISC
-#define EM_F2MC16 104      // Fujitsu F2MC16
-#define EM_MSP430 105      // TI msp430 micro controller
-#define EM_BLACKFIN 106    // ADI Blackfin
-#define EM_SE_C33 107      // S1C33 Family of Seiko Epson processors
-#define EM_SEP 108         // Sharp embedded microprocessor
-#define EM_ARCA 109        // Arca RISC Microprocessor
-#define EM_UNICORE 110  // Microprocessor series from PKU-Unity Ltd. and MPRC of Peking University
-#define EM_EXCESS 111   // eXcess: 16/32/64-bit configurable embedded CPU
-#define EM_DXP 112      // Icera Semiconductor Inc. Deep Execution Processor
-#define EM_ALTERA_NIOS2 113   // Altera Nios II soft-core processor
-#define EM_CRX 114            // National Semiconductor CRX
-#define EM_XGATE 115          // Motorola XGATE embedded processor
-#define EM_C166 116           // Infineon C16x/XC16x processor
-#define EM_M16C 117           // Renesas M16C series microprocessors
-#define EM_DSPIC30F 118       // Microchip Technology dsPIC30F Digital Signal Controller
-#define EM_CE 119             // Freescale Communication Engine RISC core
-#define EM_M32C 120           // Renesas M32C series microprocessors
-#define EM_res121 121         // Reserved
-#define EM_res122 122         // Reserved
-#define EM_res123 123         // Reserved
-#define EM_res124 124         // Reserved
-#define EM_res125 125         // Reserved
-#define EM_res126 126         // Reserved
-#define EM_res127 127         // Reserved
-#define EM_res128 128         // Reserved
-#define EM_res129 129         // Reserved
-#define EM_res130 130         // Reserved
-#define EM_TSK3000 131        // Altium TSK3000 core
-#define EM_RS08 132           // Freescale RS08 embedded processor
-#define EM_res133 133         // Reserved
-#define EM_ECOG2 134          // Cyan Technology eCOG2 microprocessor
-#define EM_SCORE 135          // Sunplus Score
-#define EM_SCORE7 135         // Sunplus S+core7 RISC processor
-#define EM_DSP24 136          // New Japan Radio (NJR) 24-bit DSP Processor
-#define EM_VIDEOCORE3 137     // Broadcom VideoCore III processor
-#define EM_LATTICEMICO32 138  // RISC processor for Lattice FPGA architecture
-#define EM_SE_C17 139         // Seiko Epson C17 family
-#define EM_TI_C6000 140       // Texas Instruments TMS320C6000 DSP family
-#define EM_TI_C2000 141       // Texas Instruments TMS320C2000 DSP family
-#define EM_TI_C5500 142       // Texas Instruments TMS320C55x DSP family
-#define EM_res143 143         // Reserved
-#define EM_res144 144         // Reserved
-#define EM_res145 145         // Reserved
-#define EM_res146 146         // Reserved
-#define EM_res147 147         // Reserved
-#define EM_res148 148         // Reserved
-#define EM_res149 149         // Reserved
-#define EM_res150 150         // Reserved
-#define EM_res151 151         // Reserved
-#define EM_res152 152         // Reserved
-#define EM_res153 153         // Reserved
-#define EM_res154 154         // Reserved
-#define EM_res155 155         // Reserved
-#define EM_res156 156         // Reserved
-#define EM_res157 157         // Reserved
-#define EM_res158 158         // Reserved
-#define EM_res159 159         // Reserved
-#define EM_MMDSP_PLUS 160     // STMicroelectronics 64bit VLIW Data Signal Processor
-#define EM_CYPRESS_M8C 161    // Cypress M8C microprocessor
-#define EM_R32C 162           // Renesas R32C series microprocessors
-#define EM_TRIMEDIA 163       // NXP Semiconductors TriMedia architecture family
-#define EM_QDSP6 164          // QUALCOMM DSP6 Processor
-#define EM_8051 165           // Intel 8051 and variants
-#define EM_STXP7X 166         // STMicroelectronics STxP7x family
-#define EM_NDS32 167          // Andes Technology compact code size embedded RISC processor family
-#define EM_ECOG1 168          // Cyan Technology eCOG1X family
-#define EM_ECOG1X 168         // Cyan Technology eCOG1X family
-#define EM_MAXQ30 169         // Dallas Semiconductor MAXQ30 Core Micro-controllers
-#define EM_XIMO16 170         // New Japan Radio (NJR) 16-bit DSP Processor
-#define EM_MANIK 171          // M2000 Reconfigurable RISC Microprocessor
-#define EM_CRAYNV2 172        // Cray Inc. NV2 vector architecture
-#define EM_RX 173             // Renesas RX family
-#define EM_METAG 174          // Imagination Technologies META processor architecture
-#define EM_MCST_ELBRUS 175    // MCST Elbrus general purpose hardware architecture
-#define EM_ECOG16 176         // Cyan Technology eCOG16 family
-#define EM_CR16 177           // National Semiconductor CompactRISC 16-bit processor
-#define EM_ETPU 178           // Freescale Extended Time Processing Unit
-#define EM_SLE9X 179          // Infineon Technologies SLE9X core
-#define EM_L1OM 180           // Intel L1OM
-#define EM_INTEL181 181       // Reserved by Intel
-#define EM_INTEL182 182       // Reserved by Intel
-#define EM_res183 183         // Reserved by ARM
-#define EM_res184 184         // Reserved by ARM
-#define EM_AVR32 185          // Atmel Corporation 32-bit microprocessor family
-#define EM_STM8 186           // STMicroeletronics STM8 8-bit microcontroller
-#define EM_TILE64 187         // Tilera TILE64 multicore architecture family
-#define EM_TILEPRO 188        // Tilera TILEPro multicore architecture family
-#define EM_MICROBLAZE 189     // Xilinx MicroBlaze 32-bit RISC soft processor core
-#define EM_CUDA 190           // NVIDIA CUDA architecture
-#define EM_TILEGX 191         // Tilera TILE-Gx multicore architecture family
-#define EM_CLOUDSHIELD 192    // CloudShield architecture family
-#define EM_COREA_1ST 193      // KIPO-KAIST Core-A 1st generation processor family
-#define EM_COREA_2ND 194      // KIPO-KAIST Core-A 2nd generation processor family
-#define EM_ARC_COMPACT2 195   // Synopsys ARCompact V2
-#define EM_OPEN8 196          // Open8 8-bit RISC soft processor core
-#define EM_RL78 197           // Renesas RL78 family
-#define EM_VIDEOCORE5 198     // Broadcom VideoCore V processor
-#define EM_78KOR 199          // Renesas 78KOR family
-#define EM_56800EX 200        // Freescale 56800EX Digital Signal Controller (DSC)
-#define EM_BA1 201            // Beyond BA1 CPU architecture
-#define EM_BA2 202            // Beyond BA2 CPU architecture
-#define EM_XCORE 203          // XMOS xCORE processor family
-#define EM_MCHP_PIC 204       // Microchip 8-bit PIC(r) family
-#define EM_INTEL205 205       // Reserved by Intel
-#define EM_INTEL206 206       // Reserved by Intel
-#define EM_INTEL207 207       // Reserved by Intel
-#define EM_INTEL208 208       // Reserved by Intel
-#define EM_INTEL209 209       // Reserved by Intel
-#define EM_KM32 210           // KM211 KM32 32-bit processor
-#define EM_KMX32 211          // KM211 KMX32 32-bit processor
-#define EM_KMX16 212          // KM211 KMX16 16-bit processor
-#define EM_KMX8 213           // KM211 KMX8 8-bit processor
-#define EM_KVARC 214          // KM211 KVARC processor
-#define EM_CDP 215            // Paneve CDP architecture family
-#define EM_COGE 216           // Cognitive Smart Memory Processor
-#define EM_COOL 217           // iCelero CoolEngine
-#define EM_NORC 218           // Nanoradio Optimized RISC
-#define EM_CSR_KALIMBA 219    // CSR Kalimba architecture family
-#define EM_Z80 220            // Zilog Z80
-#define EM_VISIUM 221         // Controls and Data Services VISIUMcore processor
-#define EM_FT32 222           // FTDI Chip FT32 high performance 32-bit RISC architecture
-#define EM_MOXIE 223          // Moxie processor family
-#define EM_AMDGPU 224         // AMD GPU architecture
-#define EM_RISCV 243          // RISC-V
-#define EM_LANAI 244          // Lanai processor
-#define EM_CEVA 245           // CEVA Processor Architecture Family
-#define EM_CEVA_X2 246        // CEVA X2 Processor Family
-#define EM_BPF 247            // Linux BPF – in-kernel virtual machine
-
-// File version
-#define EV_NONE 0
-#define EV_CURRENT 1
-
-// Identification index
-#define EI_MAG0 0
-#define EI_MAG1 1
-#define EI_MAG2 2
-#define EI_MAG3 3
-#define EI_CLASS 4
-#define EI_DATA 5
-#define EI_VERSION 6
-#define EI_OSABI 7
-#define EI_ABIVERSION 8
-#define EI_PAD 9
-#define EI_NIDENT 16
-
-// Magic number
-#define ELFMAG0 0x7F
-#define ELFMAG1 'E'
-#define ELFMAG2 'L'
-#define ELFMAG3 'F'
-
-// File class
-#define ELFCLASSNONE 0
-#define ELFCLASS32 1
-#define ELFCLASS64 2
-
-// Encoding
-#define ELFDATANONE 0
-#define ELFDATA2LSB 1
-#define ELFDATA2MSB 2
-
-// OS extensions
-#define ELFOSABI_NONE 0      // No extensions or unspecified
-#define ELFOSABI_HPUX 1      // Hewlett-Packard HP-UX
-#define ELFOSABI_NETBSD 2    // NetBSD
-#define ELFOSABI_LINUX 3     // Linux
-#define ELFOSABI_SOLARIS 6   // Sun Solaris
-#define ELFOSABI_AIX 7       // AIX
-#define ELFOSABI_IRIX 8      // IRIX
-#define ELFOSABI_FREEBSD 9   // FreeBSD
-#define ELFOSABI_TRU64 10    // Compaq TRU64 UNIX
-#define ELFOSABI_MODESTO 11  // Novell Modesto
-#define ELFOSABI_OPENBSD 12  // Open BSD
-#define ELFOSABI_OPENVMS 13  // Open VMS
-#define ELFOSABI_NSK 14      // Hewlett-Packard Non-Stop Kernel
-#define ELFOSABI_AROS 15     // Amiga Research OS
-#define ELFOSABI_FENIXOS 16  // The FenixOS highly scalable multi-core OS
-//                       64-255 Architecture-specific value range
-
-
-/////////////////////
-// Sections constants
-
-// Section indexes
-#define SHN_UNDEF 0
-#define SHN_LORESERVE 0xFF00
-#define SHN_LOPROC 0xFF00
-#define SHN_HIPROC 0xFF1F
-#define SHN_LOOS 0xFF20
-#define SHN_HIOS 0xFF3F
-#define SHN_ABS 0xFFF1
-#define SHN_COMMON 0xFFF2
-#define SHN_XINDEX 0xFFFF
-#define SHN_HIRESERVE 0xFFFF
-
-// Section types
-#define SHT_NULL 0
-#define SHT_PROGBITS 1
-#define SHT_SYMTAB 2
-#define SHT_STRTAB 3
-#define SHT_RELA 4
-#define SHT_HASH 5
-#define SHT_DYNAMIC 6
-#define SHT_NOTE 7
-#define SHT_NOBITS 8
-#define SHT_REL 9
-#define SHT_SHLIB 10
-#define SHT_DYNSYM 11
-#define SHT_INIT_ARRAY 14
-#define SHT_FINI_ARRAY 15
-#define SHT_PREINIT_ARRAY 16
-#define SHT_GROUP 17
-#define SHT_SYMTAB_SHNDX 18
-#define SHT_LOOS 0x60000000
-#define SHT_HIOS 0x6fffffff
-#define SHT_LOPROC 0x70000000
-#define SHT_HIPROC 0x7FFFFFFF
-#define SHT_LOUSER 0x80000000
-#define SHT_HIUSER 0xFFFFFFFF
-
-// Section attribute flags
-#define SHF_WRITE 0x1
-#define SHF_ALLOC 0x2
-#define SHF_EXECINSTR 0x4
-#define SHF_MERGE 0x10
-#define SHF_STRINGS 0x20
-#define SHF_INFO_LINK 0x40
-#define SHF_LINK_ORDER 0x80
-#define SHF_OS_NONCONFORMING 0x100
-#define SHF_GROUP 0x200
-#define SHF_TLS 0x400
-#define SHF_MASKOS 0x0ff00000
-#define SHF_MASKPROC 0xF0000000
-
-// Section group flags
-#define GRP_COMDAT 0x1
-#define GRP_MASKOS 0x0ff00000
-#define GRP_MASKPROC 0xf0000000
-
-// Symbol binding
-#define STB_LOCAL 0
-#define STB_GLOBAL 1
-#define STB_WEAK 2
-#define STB_LOOS 10
-#define STB_HIOS 12
-#define STB_MULTIDEF 13
-#define STB_LOPROC 13
-#define STB_HIPROC 15
-
-// Symbol types
-#define STT_NOTYPE 0
-#define STT_OBJECT 1
-#define STT_FUNC 2
-#define STT_SECTION 3
-#define STT_FILE 4
-#define STT_COMMON 5
-#define STT_TLS 6
-#define STT_LOOS 10
-#define STT_HIOS 12
-#define STT_LOPROC 13
-#define STT_HIPROC 15
-
-// Symbol visibility
-#define STV_DEFAULT 0
-#define STV_INTERNAL 1
-#define STV_HIDDEN 2
-#define STV_PROTECTED 3
-
-// Undefined name
-#define STN_UNDEF 0
-
-// Relocation types
-#define R_386_NONE 0
-#define R_X86_64_NONE 0
-#define R_386_32 1
-#define R_X86_64_64 1
-#define R_386_PC32 2
-#define R_X86_64_PC32 2
-#define R_386_GOT32 3
-#define R_X86_64_GOT32 3
-#define R_386_PLT32 4
-#define R_X86_64_PLT32 4
-#define R_386_COPY 5
-#define R_X86_64_COPY 5
-#define R_386_GLOB_DAT 6
-#define R_X86_64_GLOB_DAT 6
-#define R_386_JMP_SLOT 7
-#define R_X86_64_JUMP_SLOT 7
-#define R_386_RELATIVE 8
-#define R_X86_64_RELATIVE 8
-#define R_386_GOTOFF 9
-#define R_X86_64_GOTPCREL 9
-#define R_386_GOTPC 10
-#define R_X86_64_32 10
-#define R_X86_64_32S 11
-#define R_X86_64_16 12
-#define R_X86_64_PC16 13
-#define R_X86_64_8 14
-#define R_X86_64_PC8 15
-#define R_X86_64_DTPMOD64 16
-#define R_X86_64_DTPOFF64 17
-#define R_X86_64_TPOFF64 18
-#define R_X86_64_TLSGD 19
-#define R_X86_64_TLSLD 20
-#define R_X86_64_DTPOFF32 21
-#define R_X86_64_GOTTPOFF 22
-#define R_X86_64_TPOFF32 23
-#define R_X86_64_PC64 24
-#define R_X86_64_GOTOFF64 25
-#define R_X86_64_GOTPC32 26
-#define R_X86_64_GOT64 27
-#define R_X86_64_GOTPCREL64 28
-#define R_X86_64_GOTPC64 29
-#define R_X86_64_GOTPLT64 30
-#define R_X86_64_PLTOFF64 31
-#define R_X86_64_GOTPC32_TLSDESC 34
-#define R_X86_64_TLSDESC_CALL 35
-#define R_X86_64_TLSDESC 36
-#define R_X86_64_IRELATIVE 37
-#define R_X86_64_GNU_VTINHERIT 250
-#define R_X86_64_GNU_VTENTRY 251
-
-// Segment types
-#define PT_NULL 0
-#define PT_LOAD 1
-#define PT_DYNAMIC 2
-#define PT_INTERP 3
-#define PT_NOTE 4
-#define PT_SHLIB 5
-#define PT_PHDR 6
-#define PT_TLS 7
-#define PT_LOOS 0x60000000
-#define PT_HIOS 0x6fffffff
-#define PT_LOPROC 0x70000000
-#define PT_HIPROC 0x7FFFFFFF
-
-// Segment flags
-#define PF_X 1                  // Execute
-#define PF_W 2                  // Write
-#define PF_R 4                  // Read
-#define PF_MASKOS 0x0ff00000    // Unspecified
-#define PF_MASKPROC 0xf0000000  // Unspecified
-
-// Dynamic Array Tags
-#define DT_NULL 0
-#define DT_NEEDED 1
-#define DT_PLTRELSZ 2
-#define DT_PLTGOT 3
-#define DT_HASH 4
-#define DT_STRTAB 5
-#define DT_SYMTAB 6
-#define DT_RELA 7
-#define DT_RELASZ 8
-#define DT_RELAENT 9
-#define DT_STRSZ 10
-#define DT_SYMENT 11
-#define DT_INIT 12
-#define DT_FINI 13
-#define DT_SONAME 14
-#define DT_RPATH 15
-#define DT_SYMBOLIC 16
-#define DT_REL 17
-#define DT_RELSZ 18
-#define DT_RELENT 19
-#define DT_PLTREL 20
-#define DT_DEBUG 21
-#define DT_TEXTREL 22
-#define DT_JMPREL 23
-#define DT_BIND_NOW 24
-#define DT_INIT_ARRAY 25
-#define DT_FINI_ARRAY 26
-#define DT_INIT_ARRAYSZ 27
-#define DT_FINI_ARRAYSZ 28
-#define DT_RUNPATH 29
-#define DT_FLAGS 30
-#define DT_ENCODING 32
-#define DT_PREINIT_ARRAY 32
-#define DT_PREINIT_ARRAYSZ 33
-#define DT_MAXPOSTAGS 34
-#define DT_LOOS 0x6000000D
-#define DT_HIOS 0x6ffff000
-#define DT_LOPROC 0x70000000
-#define DT_HIPROC 0x7FFFFFFF
-
-// DT_FLAGS values
-#define DF_ORIGIN 0x1
-#define DF_SYMBOLIC 0x2
-#define DF_TEXTREL 0x4
-#define DF_BIND_NOW 0x8
-#define DF_STATIC_TLS 0x10
-
-
-// ELF file header
-struct Elf32_Ehdr {
-    unsigned char e_ident[EI_NIDENT];
-    Elf_Half e_type;
-    Elf_Half e_machine;
-    Elf_Word e_version;
-    Elf32_Addr e_entry;
-    Elf32_Off e_phoff;
-    Elf32_Off e_shoff;
-    Elf_Word e_flags;
-    Elf_Half e_ehsize;
-    Elf_Half e_phentsize;
-    Elf_Half e_phnum;
-    Elf_Half e_shentsize;
-    Elf_Half e_shnum;
-    Elf_Half e_shstrndx;
-};
-
-struct Elf64_Ehdr {
-    unsigned char e_ident[EI_NIDENT];
-    Elf_Half e_type;
-    Elf_Half e_machine;
-    Elf_Word e_version;
-    Elf64_Addr e_entry;
-    Elf64_Off e_phoff;
-    Elf64_Off e_shoff;
-    Elf_Word e_flags;
-    Elf_Half e_ehsize;
-    Elf_Half e_phentsize;
-    Elf_Half e_phnum;
-    Elf_Half e_shentsize;
-    Elf_Half e_shnum;
-    Elf_Half e_shstrndx;
-};
-
-
-// Section header
-struct Elf32_Shdr {
-    Elf_Word sh_name;
-    Elf_Word sh_type;
-    Elf_Word sh_flags;
-    Elf32_Addr sh_addr;
-    Elf32_Off sh_offset;
-    Elf_Word sh_size;
-    Elf_Word sh_link;
-    Elf_Word sh_info;
-    Elf_Word sh_addralign;
-    Elf_Word sh_entsize;
-};
-
-struct Elf64_Shdr {
-    Elf_Word sh_name;
-    Elf_Word sh_type;
-    Elf_Xword sh_flags;
-    Elf64_Addr sh_addr;
-    Elf64_Off sh_offset;
-    Elf_Xword sh_size;
-    Elf_Word sh_link;
-    Elf_Word sh_info;
-    Elf_Xword sh_addralign;
-    Elf_Xword sh_entsize;
-};
-
-
-// Segment header
-struct Elf32_Phdr {
-    Elf_Word p_type;
-    Elf32_Off p_offset;
-    Elf32_Addr p_vaddr;
-    Elf32_Addr p_paddr;
-    Elf_Word p_filesz;
-    Elf_Word p_memsz;
-    Elf_Word p_flags;
-    Elf_Word p_align;
-};
-
-struct Elf64_Phdr {
-    Elf_Word p_type;
-    Elf_Word p_flags;
-    Elf64_Off p_offset;
-    Elf64_Addr p_vaddr;
-    Elf64_Addr p_paddr;
-    Elf_Xword p_filesz;
-    Elf_Xword p_memsz;
-    Elf_Xword p_align;
-};
-
-
-// Symbol table entry
-struct Elf32_Sym {
-    Elf_Word st_name;
-    Elf32_Addr st_value;
-    Elf_Word st_size;
-    unsigned char st_info;
-    unsigned char st_other;
-    Elf_Half st_shndx;
-};
-
-struct Elf64_Sym {
-    Elf_Word st_name;
-    unsigned char st_info;
-    unsigned char st_other;
-    Elf_Half st_shndx;
-    Elf64_Addr st_value;
-    Elf_Xword st_size;
-};
-
-
-#define ELF_ST_BIND(i) ((i) >> 4)
-#define ELF_ST_TYPE(i) ((i)&0xf)
-#define ELF_ST_INFO(b, t) (((b) << 4) + ((t)&0xf))
-
-#define ELF_ST_VISIBILITY(o) ((o)&0x3)
-
-
-// Relocation entries
-struct Elf32_Rel {
-    Elf32_Addr r_offset;
-    Elf_Word r_info;
-};
-
-struct Elf32_Rela {
-    Elf32_Addr r_offset;
-    Elf_Word r_info;
-    Elf_Sword r_addend;
-};
-
-struct Elf64_Rel {
-    Elf64_Addr r_offset;
-    Elf_Xword r_info;
-};
-
-struct Elf64_Rela {
-    Elf64_Addr r_offset;
-    Elf_Xword r_info;
-    Elf_Sxword r_addend;
-};
-
-
-#define ELF32_R_SYM(i) ((i) >> 8)
-#define ELF32_R_TYPE(i) ((unsigned char)(i))
-#define ELF32_R_INFO(s, t) (((s) << 8) + (unsigned char)(t))
-
-#define ELF64_R_SYM(i) ((i) >> 32)
-#define ELF64_R_TYPE(i) ((i)&0xffffffffL)
-#define ELF64_R_INFO(s, t) ((((int64_t)s) << 32) + ((t)&0xffffffffL))
-
-// Dynamic structure
-struct Elf32_Dyn {
-    Elf_Sword d_tag;
-    union {
-        Elf_Word d_val;
-        Elf32_Addr d_ptr;
-    } d_un;
-};
-
-struct Elf64_Dyn {
-    Elf_Sxword d_tag;
-    union {
-        Elf_Xword d_val;
-        Elf64_Addr d_ptr;
-    } d_un;
-};
-
-}  // namespace ELFIO
-
-#endif  // ELFTYPES_H
diff --git a/third_party/rocm/include/hip/hcc_detail/elfio/elfio.hpp b/third_party/rocm/include/hip/hcc_detail/elfio/elfio.hpp
deleted file mode 100644
index 6bc0418..0000000
--- a/third_party/rocm/include/hip/hcc_detail/elfio/elfio.hpp
+++ /dev/null
@@ -1,740 +0,0 @@
-/*
-Copyright (C) 2001-2015 by Serge Lamikhov-Center
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef ELFIO_HPP
-#define ELFIO_HPP
-
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4996)
-#pragma warning(disable : 4355)
-#pragma warning(disable : 4244)
-#endif
-
-#include <string>
-#include <iostream>
-#include <fstream>
-#include <algorithm>
-#include <vector>
-#include <deque>
-#include <iterator>
-#include <typeinfo>
-
-#include "elf_types.hpp"
-#include "elfio_utils.hpp"
-#include "elfio_header.hpp"
-#include "elfio_section.hpp"
-#include "elfio_segment.hpp"
-#include "elfio_strings.hpp"
-
-#define ELFIO_HEADER_ACCESS_GET(TYPE, FNAME)                                                       \
-    TYPE get_##FNAME() const { return header->get_##FNAME(); }
-
-#define ELFIO_HEADER_ACCESS_GET_SET(TYPE, FNAME)                                                   \
-    TYPE get_##FNAME() const { return header->get_##FNAME(); }                                     \
-    void set_##FNAME(TYPE val) { header->set_##FNAME(val); }
-
-namespace ELFIO {
-
-//------------------------------------------------------------------------------
-class elfio {
-   public:
-    //------------------------------------------------------------------------------
-    elfio() : sections(this), segments(this) {
-        header = 0;
-        current_file_pos = 0;
-        create(ELFCLASS32, ELFDATA2LSB);
-    }
-
-    //------------------------------------------------------------------------------
-    ~elfio() { clean(); }
-
-    //------------------------------------------------------------------------------
-    void create(unsigned char file_class, unsigned char encoding) {
-        clean();
-        convertor.setup(encoding);
-        header = create_header(file_class, encoding);
-        create_mandatory_sections();
-    }
-
-    //------------------------------------------------------------------------------
-    bool load(const std::string& file_name) {
-        std::ifstream stream;
-        stream.open(file_name.c_str(), std::ios::in | std::ios::binary);
-        if (!stream) {
-            return false;
-        }
-
-        return load(stream);
-    }
-
-    //------------------------------------------------------------------------------
-    bool load(std::istream& stream) {
-        clean();
-
-        unsigned char e_ident[EI_NIDENT];
-
-        // Read ELF file signature
-        stream.seekg(0);
-        stream.read(reinterpret_cast<char*>(&e_ident), sizeof(e_ident));
-
-        // Is it ELF file?
-        if (stream.gcount() != sizeof(e_ident) || e_ident[EI_MAG0] != ELFMAG0 ||
-            e_ident[EI_MAG1] != ELFMAG1 || e_ident[EI_MAG2] != ELFMAG2 ||
-            e_ident[EI_MAG3] != ELFMAG3) {
-            return false;
-        }
-
-        if ((e_ident[EI_CLASS] != ELFCLASS64) && (e_ident[EI_CLASS] != ELFCLASS32)) {
-            return false;
-        }
-
-        convertor.setup(e_ident[EI_DATA]);
-
-        header = create_header(e_ident[EI_CLASS], e_ident[EI_DATA]);
-        if (0 == header) {
-            return false;
-        }
-        if (!header->load(stream)) {
-            return false;
-        }
-
-        load_sections(stream);
-        load_segments(stream);
-
-        return true;
-    }
-
-    //------------------------------------------------------------------------------
-    bool save(const std::string& file_name) {
-        std::ofstream f(file_name.c_str(), std::ios::out | std::ios::binary);
-
-        if (!f) {
-            return false;
-        }
-
-        bool is_still_good = true;
-
-        // Define layout specific header fields
-        // The position of the segment table is fixed after the header.
-        // The position of the section table is variable and needs to be fixed
-        // before saving.
-        header->set_segments_num(segments.size());
-        header->set_segments_offset(segments.size() ? header->get_header_size() : 0);
-        header->set_sections_num(sections.size());
-        header->set_sections_offset(0);
-
-        // Layout the first section right after the segment table
-        current_file_pos = header->get_header_size() +
-                           header->get_segment_entry_size() * header->get_segments_num();
-
-        is_still_good = layout_segments_and_their_sections();
-        is_still_good = is_still_good && layout_sections_without_segments();
-        is_still_good = is_still_good && layout_section_table();
-
-        is_still_good = is_still_good && save_header(f);
-        is_still_good = is_still_good && save_sections(f);
-        is_still_good = is_still_good && save_segments(f);
-
-        f.close();
-
-        return is_still_good;
-    }
-
-    //------------------------------------------------------------------------------
-    // ELF header access functions
-    ELFIO_HEADER_ACCESS_GET(unsigned char, class);
-    ELFIO_HEADER_ACCESS_GET(unsigned char, elf_version);
-    ELFIO_HEADER_ACCESS_GET(unsigned char, encoding);
-    ELFIO_HEADER_ACCESS_GET(Elf_Word, version);
-    ELFIO_HEADER_ACCESS_GET(Elf_Half, header_size);
-    ELFIO_HEADER_ACCESS_GET(Elf_Half, section_entry_size);
-    ELFIO_HEADER_ACCESS_GET(Elf_Half, segment_entry_size);
-
-    ELFIO_HEADER_ACCESS_GET_SET(unsigned char, os_abi);
-    ELFIO_HEADER_ACCESS_GET_SET(unsigned char, abi_version);
-    ELFIO_HEADER_ACCESS_GET_SET(Elf_Half, type);
-    ELFIO_HEADER_ACCESS_GET_SET(Elf_Half, machine);
-    ELFIO_HEADER_ACCESS_GET_SET(Elf_Word, flags);
-    ELFIO_HEADER_ACCESS_GET_SET(Elf64_Addr, entry);
-    ELFIO_HEADER_ACCESS_GET_SET(Elf64_Off, sections_offset);
-    ELFIO_HEADER_ACCESS_GET_SET(Elf64_Off, segments_offset);
-    ELFIO_HEADER_ACCESS_GET_SET(Elf_Half, section_name_str_index);
-
-    //------------------------------------------------------------------------------
-    const endianess_convertor& get_convertor() const { return convertor; }
-
-    //------------------------------------------------------------------------------
-    Elf_Xword get_default_entry_size(Elf_Word section_type) const {
-        switch (section_type) {
-            case SHT_RELA:
-                if (header->get_class() == ELFCLASS64) {
-                    return sizeof(Elf64_Rela);
-                } else {
-                    return sizeof(Elf32_Rela);
-                }
-            case SHT_REL:
-                if (header->get_class() == ELFCLASS64) {
-                    return sizeof(Elf64_Rel);
-                } else {
-                    return sizeof(Elf32_Rel);
-                }
-            case SHT_SYMTAB:
-                if (header->get_class() == ELFCLASS64) {
-                    return sizeof(Elf64_Sym);
-                } else {
-                    return sizeof(Elf32_Sym);
-                }
-            case SHT_DYNAMIC:
-                if (header->get_class() == ELFCLASS64) {
-                    return sizeof(Elf64_Dyn);
-                } else {
-                    return sizeof(Elf32_Dyn);
-                }
-            default:
-                return 0;
-        }
-    }
-
-    //------------------------------------------------------------------------------
-   private:
-    //------------------------------------------------------------------------------
-    void clean() {
-        delete header;
-        header = 0;
-
-        std::vector<section*>::const_iterator it;
-        for (it = sections_.begin(); it != sections_.end(); ++it) {
-            delete *it;
-        }
-        sections_.clear();
-
-        std::vector<segment*>::const_iterator it1;
-        for (it1 = segments_.begin(); it1 != segments_.end(); ++it1) {
-            delete *it1;
-        }
-        segments_.clear();
-    }
-
-    //------------------------------------------------------------------------------
-    elf_header* create_header(unsigned char file_class, unsigned char encoding) {
-        elf_header* new_header = 0;
-
-        if (file_class == ELFCLASS64) {
-            new_header = new elf_header_impl<Elf64_Ehdr>(&convertor, encoding);
-        } else if (file_class == ELFCLASS32) {
-            new_header = new elf_header_impl<Elf32_Ehdr>(&convertor, encoding);
-        } else {
-            return 0;
-        }
-
-        return new_header;
-    }
-
-    //------------------------------------------------------------------------------
-    section* create_section() {
-        section* new_section;
-        unsigned char file_class = get_class();
-
-        if (file_class == ELFCLASS64) {
-            new_section = new section_impl<Elf64_Shdr>(&convertor);
-        } else if (file_class == ELFCLASS32) {
-            new_section = new section_impl<Elf32_Shdr>(&convertor);
-        } else {
-            return 0;
-        }
-
-        new_section->set_index((Elf_Half)sections_.size());
-        sections_.push_back(new_section);
-
-        return new_section;
-    }
-
-
-    //------------------------------------------------------------------------------
-    segment* create_segment() {
-        segment* new_segment;
-        unsigned char file_class = header->get_class();
-
-        if (file_class == ELFCLASS64) {
-            new_segment = new segment_impl<Elf64_Phdr>(&convertor);
-        } else if (file_class == ELFCLASS32) {
-            new_segment = new segment_impl<Elf32_Phdr>(&convertor);
-        } else {
-            return 0;
-        }
-
-        new_segment->set_index((Elf_Half)segments_.size());
-        segments_.push_back(new_segment);
-
-        return new_segment;
-    }
-
-    //------------------------------------------------------------------------------
-    void create_mandatory_sections() {
-        // Create null section without calling to 'add_section' as no string
-        // section containing section names exists yet
-        section* sec0 = create_section();
-        sec0->set_index(0);
-        sec0->set_name("");
-        sec0->set_name_string_offset(0);
-
-        set_section_name_str_index(1);
-        section* shstrtab = sections.add(".shstrtab");
-        shstrtab->set_type(SHT_STRTAB);
-        shstrtab->set_addr_align(1);
-    }
-
-    //------------------------------------------------------------------------------
-    Elf_Half load_sections(std::istream& stream) {
-        Elf_Half entry_size = header->get_section_entry_size();
-        Elf_Half num = header->get_sections_num();
-        Elf64_Off offset = header->get_sections_offset();
-
-        for (Elf_Half i = 0; i < num; ++i) {
-            section* sec = create_section();
-            sec->load(stream, (std::streamoff)offset + i * entry_size);
-            sec->set_index(i);
-            // To mark that the section is not permitted to reassign address
-            // during layout calculation
-            sec->set_address(sec->get_address());
-        }
-
-        Elf_Half shstrndx = get_section_name_str_index();
-
-        if (SHN_UNDEF != shstrndx) {
-            string_section_accessor str_reader(sections[shstrndx]);
-            for (Elf_Half i = 0; i < num; ++i) {
-                Elf_Word offset = sections[i]->get_name_string_offset();
-                const char* p = str_reader.get_string(offset);
-                if (p != 0) {
-                    sections[i]->set_name(p);
-                }
-            }
-        }
-
-        return num;
-    }
-
-    //------------------------------------------------------------------------------
-    bool load_segments(std::istream& stream) {
-        Elf_Half entry_size = header->get_segment_entry_size();
-        Elf_Half num = header->get_segments_num();
-        Elf64_Off offset = header->get_segments_offset();
-
-        for (Elf_Half i = 0; i < num; ++i) {
-            segment* seg;
-            unsigned char file_class = header->get_class();
-
-            if (file_class == ELFCLASS64) {
-                seg = new segment_impl<Elf64_Phdr>(&convertor);
-            } else if (file_class == ELFCLASS32) {
-                seg = new segment_impl<Elf32_Phdr>(&convertor);
-            } else {
-                return false;
-            }
-
-            seg->load(stream, (std::streamoff)offset + i * entry_size);
-            seg->set_index(i);
-
-            // Add sections to the segments (similar to readelfs algorithm)
-            Elf64_Off segBaseOffset = seg->get_offset();
-            Elf64_Off segEndOffset = segBaseOffset + seg->get_file_size();
-            Elf64_Off segVBaseAddr = seg->get_virtual_address();
-            Elf64_Off segVEndAddr = segVBaseAddr + seg->get_memory_size();
-            for (Elf_Half j = 0; j < sections.size(); ++j) {
-                const section* psec = sections[j];
-
-                // SHF_ALLOC sections are matched based on the virtual address
-                // otherwise the file offset is matched
-                if (psec->get_flags() & SHF_ALLOC
-                        ? (segVBaseAddr <= psec->get_address() &&
-                           psec->get_address() + psec->get_size() <= segVEndAddr)
-                        : (segBaseOffset <= psec->get_offset() &&
-                           psec->get_offset() + psec->get_size() <= segEndOffset)) {
-                    seg->add_section_index(psec->get_index(), psec->get_addr_align());
-                }
-            }
-
-            // Add section into the segments' container
-            segments_.push_back(seg);
-        }
-
-        return true;
-    }
-
-    //------------------------------------------------------------------------------
-    bool save_header(std::ofstream& f) { return header->save(f); }
-
-    //------------------------------------------------------------------------------
-    bool save_sections(std::ofstream& f) {
-        for (unsigned int i = 0; i < sections_.size(); ++i) {
-            section* sec = sections_.at(i);
-
-            std::streampos headerPosition = (std::streamoff)header->get_sections_offset() +
-                                            header->get_section_entry_size() * sec->get_index();
-
-            sec->save(f, headerPosition, sec->get_offset());
-        }
-        return true;
-    }
-
-    //------------------------------------------------------------------------------
-    bool save_segments(std::ofstream& f) {
-        for (unsigned int i = 0; i < segments_.size(); ++i) {
-            segment* seg = segments_.at(i);
-
-            std::streampos headerPosition =
-                header->get_segments_offset() + header->get_segment_entry_size() * seg->get_index();
-
-            seg->save(f, headerPosition, seg->get_offset());
-        }
-        return true;
-    }
-
-    //------------------------------------------------------------------------------
-    bool is_section_without_segment(unsigned int section_index) {
-        bool found = false;
-
-        for (unsigned int j = 0; !found && (j < segments.size()); ++j) {
-            for (unsigned int k = 0; !found && (k < segments[j]->get_sections_num()); ++k) {
-                found = segments[j]->get_section_index_at(k) == section_index;
-            }
-        }
-
-        return !found;
-    }
-
-    //------------------------------------------------------------------------------
-    bool is_subsequence_of(segment* seg1, segment* seg2) {
-        // Return 'true' if sections of seg1 are a subset of sections in seg2
-        const std::vector<Elf_Half>& sections1 = seg1->get_sections();
-        const std::vector<Elf_Half>& sections2 = seg2->get_sections();
-
-        bool found = false;
-        if (sections1.size() < sections2.size()) {
-            found = std::includes(sections2.begin(), sections2.end(), sections1.begin(),
-                                  sections1.end());
-        }
-
-        return found;
-    }
-
-    //------------------------------------------------------------------------------
-    std::vector<segment*> get_ordered_segments() {
-        std::vector<segment*> res;
-        std::deque<segment*> worklist;
-
-        res.reserve(segments.size());
-        std::copy(segments_.begin(), segments_.end(), std::back_inserter(worklist));
-
-        // Bring the segments which start at address 0 to the front
-        size_t nextSlot = 0;
-        for (size_t i = 0; i < worklist.size(); ++i) {
-            if (i != nextSlot && worklist[i]->is_offset_initialized() &&
-                worklist[i]->get_offset() == 0) {
-                std::swap(worklist[i], worklist[nextSlot]);
-                ++nextSlot;
-            }
-        }
-
-        while (!worklist.empty()) {
-            segment* seg = worklist.front();
-            worklist.pop_front();
-
-            size_t i = 0;
-            for (; i < worklist.size(); ++i) {
-                if (is_subsequence_of(seg, worklist[i])) {
-                    break;
-                }
-            }
-
-            if (i < worklist.size())
-                worklist.push_back(seg);
-            else
-                res.push_back(seg);
-        }
-
-        return res;
-    }
-
-
-    //------------------------------------------------------------------------------
-    bool layout_sections_without_segments() {
-        for (unsigned int i = 0; i < sections_.size(); ++i) {
-            if (is_section_without_segment(i)) {
-                section* sec = sections_[i];
-
-                Elf_Xword section_align = sec->get_addr_align();
-                if (section_align > 1 && current_file_pos % section_align != 0) {
-                    current_file_pos += section_align - current_file_pos % section_align;
-                }
-
-                if (0 != sec->get_index()) sec->set_offset(current_file_pos);
-
-                if (SHT_NOBITS != sec->get_type() && SHT_NULL != sec->get_type()) {
-                    current_file_pos += sec->get_size();
-                }
-            }
-        }
-
-        return true;
-    }
-
-
-    //------------------------------------------------------------------------------
-    bool layout_segments_and_their_sections() {
-        std::vector<segment*> worklist;
-        std::vector<bool> section_generated(sections.size(), false);
-
-        // Get segments in a order in where segments which contain a
-        // sub sequence of other segments are located at the end
-        worklist = get_ordered_segments();
-
-        for (unsigned int i = 0; i < worklist.size(); ++i) {
-            Elf_Xword segment_memory = 0;
-            Elf_Xword segment_filesize = 0;
-            Elf_Xword seg_start_pos = current_file_pos;
-            segment* seg = worklist[i];
-
-            // Special case: PHDR segment
-            // This segment contains the program headers but no sections
-            if (seg->get_type() == PT_PHDR && seg->get_sections_num() == 0) {
-                seg_start_pos = header->get_segments_offset();
-                segment_memory = segment_filesize =
-                    header->get_segment_entry_size() * header->get_segments_num();
-            }
-            // Special case:
-            // Segments which start with the NULL section and have further sections
-            else if (seg->get_sections_num() > 1 &&
-                     sections[seg->get_section_index_at(0)]->get_type() == SHT_NULL) {
-                seg_start_pos = 0;
-                if (seg->get_sections_num()) {
-                    segment_memory = segment_filesize = current_file_pos;
-                }
-            }
-            // New segments with not generated sections
-            // have to be aligned
-            else if (seg->get_sections_num() && !section_generated[seg->get_section_index_at(0)]) {
-                Elf64_Off cur_page_alignment = current_file_pos % seg->get_align();
-                Elf64_Off req_page_alignment = seg->get_virtual_address() % seg->get_align();
-                Elf64_Off error = req_page_alignment - cur_page_alignment;
-
-                current_file_pos += (seg->get_align() + error) % seg->get_align();
-                seg_start_pos = current_file_pos;
-            } else if (seg->get_sections_num()) {
-                seg_start_pos = sections[seg->get_section_index_at(0)]->get_offset();
-            }
-
-            // Write segment's data
-            for (unsigned int j = 0; j < seg->get_sections_num(); ++j) {
-                Elf_Half index = seg->get_section_index_at(j);
-
-                section* sec = sections[index];
-
-                // The NULL section is always generated
-                if (SHT_NULL == sec->get_type()) {
-                    section_generated[index] = true;
-                    continue;
-                }
-
-                Elf_Xword secAlign = 0;
-                // Fix up the alignment
-                if (!section_generated[index] && sec->is_address_initialized() &&
-                    SHT_NOBITS != sec->get_type() && SHT_NULL != sec->get_type()) {
-                    // Align the sections based on the virtual addresses
-                    // when possible (this is what matters for execution)
-                    Elf64_Off req_offset = sec->get_address() - seg->get_virtual_address();
-                    Elf64_Off cur_offset = current_file_pos - seg_start_pos;
-                    secAlign = req_offset - cur_offset;
-                } else if (!section_generated[index]) {
-                    // If no address has been specified then only the section
-                    // alignment constraint has to be matched
-                    Elf_Xword align = sec->get_addr_align();
-                    if (align == 0) {
-                        align = 1;
-                    }
-                    Elf64_Off error = current_file_pos % align;
-                    secAlign = (align - error) % align;
-                } else {
-                    // Alignment for already generated sections
-                    secAlign = sec->get_offset() - seg_start_pos - segment_filesize;
-                }
-
-                // Determine the segment file and memory sizes
-                // Special case .tbss section (NOBITS) in non TLS segment
-                if ((sec->get_flags() & SHF_ALLOC) &&
-                    !((sec->get_flags() & SHF_TLS) && (seg->get_type() != PT_TLS) &&
-                      (SHT_NOBITS == sec->get_type())))
-                    segment_memory += sec->get_size() + secAlign;
-                if (SHT_NOBITS != sec->get_type() && SHT_NULL != sec->get_type())
-                    segment_filesize += sec->get_size() + secAlign;
-
-                // Nothing to be done when generating nested segments
-                if (section_generated[index]) {
-                    continue;
-                }
-
-                current_file_pos += secAlign;
-
-                // Set the section addresses when missing
-                if (!sec->is_address_initialized())
-                    sec->set_address(seg->get_virtual_address() + current_file_pos - seg_start_pos);
-
-                if (0 != sec->get_index()) sec->set_offset(current_file_pos);
-
-                if (SHT_NOBITS != sec->get_type() && SHT_NULL != sec->get_type())
-                    current_file_pos += sec->get_size();
-                section_generated[index] = true;
-            }
-
-            seg->set_file_size(segment_filesize);
-            seg->set_memory_size(segment_memory);
-            seg->set_offset(seg_start_pos);
-        }
-
-        return true;
-    }
-
-    //------------------------------------------------------------------------------
-    bool layout_section_table() {
-        // Simply place the section table at the end for now
-        Elf64_Off alignmentError = current_file_pos % 4;
-        current_file_pos += (4 - alignmentError) % 4;
-        header->set_sections_offset(current_file_pos);
-        return true;
-    }
-
-
-    //------------------------------------------------------------------------------
-   public:
-    friend class Sections;
-    class Sections {
-       public:
-        //------------------------------------------------------------------------------
-        Sections(elfio* parent_) : parent(parent_) {}
-
-        //------------------------------------------------------------------------------
-        Elf_Half size() const { return (Elf_Half)parent->sections_.size(); }
-
-        //------------------------------------------------------------------------------
-        section* operator[](unsigned int index) const {
-            section* sec = 0;
-
-            if (index < parent->sections_.size()) {
-                sec = parent->sections_[index];
-            }
-
-            return sec;
-        }
-
-        //------------------------------------------------------------------------------
-        section* operator[](const std::string& name) const {
-            section* sec = 0;
-
-            std::vector<section*>::const_iterator it;
-            for (it = parent->sections_.begin(); it != parent->sections_.end(); ++it) {
-                if ((*it)->get_name() == name) {
-                    sec = *it;
-                    break;
-                }
-            }
-
-            return sec;
-        }
-
-        //------------------------------------------------------------------------------
-        section* add(const std::string& name) {
-            section* new_section = parent->create_section();
-            new_section->set_name(name);
-
-            Elf_Half str_index = parent->get_section_name_str_index();
-            section* string_table(parent->sections_[str_index]);
-            string_section_accessor str_writer(string_table);
-            Elf_Word pos = str_writer.add_string(name);
-            new_section->set_name_string_offset(pos);
-
-            return new_section;
-        }
-
-        //------------------------------------------------------------------------------
-        std::vector<section*>::iterator begin() { return parent->sections_.begin(); }
-
-        //------------------------------------------------------------------------------
-        std::vector<section*>::iterator end() { return parent->sections_.end(); }
-
-        //------------------------------------------------------------------------------
-       private:
-        elfio* parent;
-    } sections;
-
-    //------------------------------------------------------------------------------
-   public:
-    friend class Segments;
-    class Segments {
-       public:
-        //------------------------------------------------------------------------------
-        Segments(elfio* parent_) : parent(parent_) {}
-
-        //------------------------------------------------------------------------------
-        Elf_Half size() const { return (Elf_Half)parent->segments_.size(); }
-
-        //------------------------------------------------------------------------------
-        segment* operator[](unsigned int index) const { return parent->segments_[index]; }
-
-
-        //------------------------------------------------------------------------------
-        segment* add() { return parent->create_segment(); }
-
-        //------------------------------------------------------------------------------
-        std::vector<segment*>::iterator begin() { return parent->segments_.begin(); }
-
-        //------------------------------------------------------------------------------
-        std::vector<segment*>::iterator end() { return parent->segments_.end(); }
-
-        //------------------------------------------------------------------------------
-       private:
-        elfio* parent;
-    } segments;
-
-    //------------------------------------------------------------------------------
-   private:
-    elf_header* header;
-    std::vector<section*> sections_;
-    std::vector<segment*> segments_;
-    endianess_convertor convertor;
-
-    Elf_Xword current_file_pos;
-};
-
-}  // namespace ELFIO
-
-#include "elfio_symbols.hpp"
-#include "elfio_note.hpp"
-#include "elfio_relocation.hpp"
-#include "elfio_dynamic.hpp"
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
-#endif  // ELFIO_HPP
diff --git a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_dump.hpp b/third_party/rocm/include/hip/hcc_detail/elfio/elfio_dump.hpp
deleted file mode 100644
index c40a010..0000000
--- a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_dump.hpp
+++ /dev/null
@@ -1,825 +0,0 @@
-/*
-Copyright (C) 2001-2015 by Serge Lamikhov-Center
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef ELFIO_DUMP_HPP
-#define ELFIO_DUMP_HPP
-
-#include <algorithm>
-#include <string>
-#include <ostream>
-#include <sstream>
-#include <iomanip>
-#include "elfio.hpp"
-
-namespace ELFIO {
-
-
-static struct class_table_t {
-    const char key;
-    const char* str;
-} class_table[] = {
-    {ELFCLASS32, "ELF32"},
-    {ELFCLASS64, "ELF64"},
-};
-
-
-static struct endian_table_t {
-    const char key;
-    const char* str;
-} endian_table[] = {
-    {ELFDATANONE, "None"},
-    {ELFDATA2LSB, "Little endian"},
-    {ELFDATA2MSB, "Big endian"},
-};
-
-
-static struct version_table_t {
-    const Elf64_Word key;
-    const char* str;
-} version_table[] = {
-    {EV_NONE, "None"},
-    {EV_CURRENT, "Current"},
-};
-
-
-static struct type_table_t {
-    const Elf32_Half key;
-    const char* str;
-} type_table[] = {
-    {ET_NONE, "No file type"},      {ET_REL, "Relocatable file"}, {ET_EXEC, "Executable file"},
-    {ET_DYN, "Shared object file"}, {ET_CORE, "Core file"},
-};
-
-
-static struct machine_table_t {
-    const Elf64_Half key;
-    const char* str;
-} machine_table[] = {
-    {EM_NONE, "No machine"},
-    {EM_M32, "AT&T WE 32100"},
-    {EM_SPARC, "SUN SPARC"},
-    {EM_386, "Intel 80386"},
-    {EM_68K, "Motorola m68k family"},
-    {EM_88K, "Motorola m88k family"},
-    {EM_486, "Intel 80486// Reserved for future use"},
-    {EM_860, "Intel 80860"},
-    {EM_MIPS, "MIPS R3000 (officially, big-endian only)"},
-    {EM_S370, "IBM System/370"},
-    {EM_MIPS_RS3_LE, "MIPS R3000 little-endian (Oct 4 1999 Draft) Deprecated"},
-    {EM_res011, "Reserved"},
-    {EM_res012, "Reserved"},
-    {EM_res013, "Reserved"},
-    {EM_res014, "Reserved"},
-    {EM_PARISC, "HPPA"},
-    {EM_res016, "Reserved"},
-    {EM_VPP550, "Fujitsu VPP500"},
-    {EM_SPARC32PLUS, "Sun's v8plus"},
-    {EM_960, "Intel 80960"},
-    {EM_PPC, "PowerPC"},
-    {EM_PPC64, "64-bit PowerPC"},
-    {EM_S390, "IBM S/390"},
-    {EM_SPU, "Sony/Toshiba/IBM SPU"},
-    {EM_res024, "Reserved"},
-    {EM_res025, "Reserved"},
-    {EM_res026, "Reserved"},
-    {EM_res027, "Reserved"},
-    {EM_res028, "Reserved"},
-    {EM_res029, "Reserved"},
-    {EM_res030, "Reserved"},
-    {EM_res031, "Reserved"},
-    {EM_res032, "Reserved"},
-    {EM_res033, "Reserved"},
-    {EM_res034, "Reserved"},
-    {EM_res035, "Reserved"},
-    {EM_V800, "NEC V800 series"},
-    {EM_FR20, "Fujitsu FR20"},
-    {EM_RH32, "TRW RH32"},
-    {EM_MCORE, "Motorola M*Core // May also be taken by Fujitsu MMA"},
-    {EM_RCE, "Old name for MCore"},
-    {EM_ARM, "ARM"},
-    {EM_OLD_ALPHA, "Digital Alpha"},
-    {EM_SH, "Renesas (formerly Hitachi) / SuperH SH"},
-    {EM_SPARCV9, "SPARC v9 64-bit"},
-    {EM_TRICORE, "Siemens Tricore embedded processor"},
-    {EM_ARC, "ARC Cores"},
-    {EM_H8_300, "Renesas (formerly Hitachi) H8/300"},
-    {EM_H8_300H, "Renesas (formerly Hitachi) H8/300H"},
-    {EM_H8S, "Renesas (formerly Hitachi) H8S"},
-    {EM_H8_500, "Renesas (formerly Hitachi) H8/500"},
-    {EM_IA_64, "Intel IA-64 Processor"},
-    {EM_MIPS_X, "Stanford MIPS-X"},
-    {EM_COLDFIRE, "Motorola Coldfire"},
-    {EM_68HC12, "Motorola M68HC12"},
-    {EM_MMA, "Fujitsu Multimedia Accelerator"},
-    {EM_PCP, "Siemens PCP"},
-    {EM_NCPU, "Sony nCPU embedded RISC processor"},
-    {EM_NDR1, "Denso NDR1 microprocesspr"},
-    {EM_STARCORE, "Motorola Star*Core processor"},
-    {EM_ME16, "Toyota ME16 processor"},
-    {EM_ST100, "STMicroelectronics ST100 processor"},
-    {EM_TINYJ, "Advanced Logic Corp. TinyJ embedded processor"},
-    {EM_X86_64, "Advanced Micro Devices X86-64 processor"},
-    {EM_PDSP, "Sony DSP Processor"},
-    {EM_PDP10, "Digital Equipment Corp. PDP-10"},
-    {EM_PDP11, "Digital Equipment Corp. PDP-11"},
-    {EM_FX66, "Siemens FX66 microcontroller"},
-    {EM_ST9PLUS, "STMicroelectronics ST9+ 8/16 bit microcontroller"},
-    {EM_ST7, "STMicroelectronics ST7 8-bit microcontroller"},
-    {EM_68HC16, "Motorola MC68HC16 Microcontroller"},
-    {EM_68HC11, "Motorola MC68HC11 Microcontroller"},
-    {EM_68HC08, "Motorola MC68HC08 Microcontroller"},
-    {EM_68HC05, "Motorola MC68HC05 Microcontroller"},
-    {EM_SVX, "Silicon Graphics SVx"},
-    {EM_ST19, "STMicroelectronics ST19 8-bit cpu"},
-    {EM_VAX, "Digital VAX"},
-    {EM_CRIS, "Axis Communications 32-bit embedded processor"},
-    {EM_JAVELIN, "Infineon Technologies 32-bit embedded cpu"},
-    {EM_FIREPATH, "Element 14 64-bit DSP processor"},
-    {EM_ZSP, "LSI Logic's 16-bit DSP processor"},
-    {EM_MMIX, "Donald Knuth's educational 64-bit processor"},
-    {EM_HUANY, "Harvard's machine-independent format"},
-    {EM_PRISM, "SiTera Prism"},
-    {EM_AVR, "Atmel AVR 8-bit microcontroller"},
-    {EM_FR30, "Fujitsu FR30"},
-    {EM_D10V, "Mitsubishi D10V"},
-    {EM_D30V, "Mitsubishi D30V"},
-    {EM_V850, "NEC v850"},
-    {EM_M32R, "Renesas M32R (formerly Mitsubishi M32R)"},
-    {EM_MN10300, "Matsushita MN10300"},
-    {EM_MN10200, "Matsushita MN10200"},
-    {EM_PJ, "picoJava"},
-    {EM_OPENRISC, "OpenRISC 32-bit embedded processor"},
-    {EM_ARC_A5, "ARC Cores Tangent-A5"},
-    {EM_XTENSA, "Tensilica Xtensa Architecture"},
-    {EM_VIDEOCORE, "Alphamosaic VideoCore processor"},
-    {EM_TMM_GPP, "Thompson Multimedia General Purpose Processor"},
-    {EM_NS32K, "National Semiconductor 32000 series"},
-    {EM_TPC, "Tenor Network TPC processor"},
-    {EM_SNP1K, "Trebia SNP 1000 processor"},
-    {EM_ST200, "STMicroelectronics ST200 microcontroller"},
-    {EM_IP2K, "Ubicom IP2022 micro controller"},
-    {EM_MAX, "MAX Processor"},
-    {EM_CR, "National Semiconductor CompactRISC"},
-    {EM_F2MC16, "Fujitsu F2MC16"},
-    {EM_MSP430, "TI msp430 micro controller"},
-    {EM_BLACKFIN, "ADI Blackfin"},
-    {EM_SE_C33, "S1C33 Family of Seiko Epson processors"},
-    {EM_SEP, "Sharp embedded microprocessor"},
-    {EM_ARCA, "Arca RISC Microprocessor"},
-    {EM_UNICORE, "Microprocessor series from PKU-Unity Ltd. and MPRC of Peking University"},
-    {EM_EXCESS, "eXcess: 16/32/64-bit configurable embedded CPU"},
-    {EM_DXP, "Icera Semiconductor Inc. Deep Execution Processor"},
-    {EM_ALTERA_NIOS2, "Altera Nios II soft-core processor"},
-    {EM_CRX, "National Semiconductor CRX"},
-    {EM_XGATE, "Motorola XGATE embedded processor"},
-    {EM_C166, "Infineon C16x/XC16x processor"},
-    {EM_M16C, "Renesas M16C series microprocessors"},
-    {EM_DSPIC30F, "Microchip Technology dsPIC30F Digital Signal Controller"},
-    {EM_CE, "Freescale Communication Engine RISC core"},
-    {EM_M32C, "Renesas M32C series microprocessors"},
-    {EM_res121, "Reserved"},
-    {EM_res122, "Reserved"},
-    {EM_res123, "Reserved"},
-    {EM_res124, "Reserved"},
-    {EM_res125, "Reserved"},
-    {EM_res126, "Reserved"},
-    {EM_res127, "Reserved"},
-    {EM_res128, "Reserved"},
-    {EM_res129, "Reserved"},
-    {EM_res130, "Reserved"},
-    {EM_TSK3000, "Altium TSK3000 core"},
-    {EM_RS08, "Freescale RS08 embedded processor"},
-    {EM_res133, "Reserved"},
-    {EM_ECOG2, "Cyan Technology eCOG2 microprocessor"},
-    {EM_SCORE, "Sunplus Score"},
-    {EM_SCORE7, "Sunplus S+core7 RISC processor"},
-    {EM_DSP24, "New Japan Radio (NJR) 24-bit DSP Processor"},
-    {EM_VIDEOCORE3, "Broadcom VideoCore III processor"},
-    {EM_LATTICEMICO32, "RISC processor for Lattice FPGA architecture"},
-    {EM_SE_C17, "Seiko Epson C17 family"},
-    {EM_TI_C6000, "Texas Instruments TMS320C6000 DSP family"},
-    {EM_TI_C2000, "Texas Instruments TMS320C2000 DSP family"},
-    {EM_TI_C5500, "Texas Instruments TMS320C55x DSP family"},
-    {EM_res143, "Reserved"},
-    {EM_res144, "Reserved"},
-    {EM_res145, "Reserved"},
-    {EM_res146, "Reserved"},
-    {EM_res147, "Reserved"},
-    {EM_res148, "Reserved"},
-    {EM_res149, "Reserved"},
-    {EM_res150, "Reserved"},
-    {EM_res151, "Reserved"},
-    {EM_res152, "Reserved"},
-    {EM_res153, "Reserved"},
-    {EM_res154, "Reserved"},
-    {EM_res155, "Reserved"},
-    {EM_res156, "Reserved"},
-    {EM_res157, "Reserved"},
-    {EM_res158, "Reserved"},
-    {EM_res159, "Reserved"},
-    {EM_MMDSP_PLUS, "STMicroelectronics 64bit VLIW Data Signal Processor"},
-    {EM_CYPRESS_M8C, "Cypress M8C microprocessor"},
-    {EM_R32C, "Renesas R32C series microprocessors"},
-    {EM_TRIMEDIA, "NXP Semiconductors TriMedia architecture family"},
-    {EM_QDSP6, "QUALCOMM DSP6 Processor"},
-    {EM_8051, "Intel 8051 and variants"},
-    {EM_STXP7X, "STMicroelectronics STxP7x family"},
-    {EM_NDS32, "Andes Technology compact code size embedded RISC processor family"},
-    {EM_ECOG1, "Cyan Technology eCOG1X family"},
-    {EM_ECOG1X, "Cyan Technology eCOG1X family"},
-    {EM_MAXQ30, "Dallas Semiconductor MAXQ30 Core Micro-controllers"},
-    {EM_XIMO16, "New Japan Radio (NJR) 16-bit DSP Processor"},
-    {EM_MANIK, "M2000 Reconfigurable RISC Microprocessor"},
-    {EM_CRAYNV2, "Cray Inc. NV2 vector architecture"},
-    {EM_RX, "Renesas RX family"},
-    {EM_METAG, "Imagination Technologies META processor architecture"},
-    {EM_MCST_ELBRUS, "MCST Elbrus general purpose hardware architecture"},
-    {EM_ECOG16, "Cyan Technology eCOG16 family"},
-    {EM_CR16, "National Semiconductor CompactRISC 16-bit processor"},
-    {EM_ETPU, "Freescale Extended Time Processing Unit"},
-    {EM_SLE9X, "Infineon Technologies SLE9X core"},
-    {EM_L1OM, "Intel L1OM"},
-    {EM_INTEL181, "Reserved by Intel"},
-    {EM_INTEL182, "Reserved by Intel"},
-    {EM_res183, "Reserved by ARM"},
-    {EM_res184, "Reserved by ARM"},
-    {EM_AVR32, "Atmel Corporation 32-bit microprocessor family"},
-    {EM_STM8, "STMicroeletronics STM8 8-bit microcontroller"},
-    {EM_TILE64, "Tilera TILE64 multicore architecture family"},
-    {EM_TILEPRO, "Tilera TILEPro multicore architecture family"},
-    {EM_MICROBLAZE, "Xilinx MicroBlaze 32-bit RISC soft processor core"},
-    {EM_CUDA, "NVIDIA CUDA architecture "},
-};
-
-
-static struct section_type_table_t {
-    const Elf64_Half key;
-    const char* str;
-} section_type_table[] = {
-    {SHT_NULL, "NULL"},
-    {SHT_PROGBITS, "PROGBITS"},
-    {SHT_SYMTAB, "SYMTAB"},
-    {SHT_STRTAB, "STRTAB"},
-    {SHT_RELA, "RELA"},
-    {SHT_HASH, "HASH"},
-    {SHT_DYNAMIC, "DYNAMIC"},
-    {SHT_NOTE, "NOTE"},
-    {SHT_NOBITS, "NOBITS"},
-    {SHT_REL, "REL"},
-    {SHT_SHLIB, "SHLIB"},
-    {SHT_DYNSYM, "DYNSYM"},
-    {SHT_INIT_ARRAY, "INIT_ARRAY"},
-    {SHT_FINI_ARRAY, "FINI_ARRAY"},
-    {SHT_PREINIT_ARRAY, "PREINIT_ARRAY"},
-    {SHT_GROUP, "GROUP"},
-    {SHT_SYMTAB_SHNDX, "SYMTAB_SHNDX "},
-};
-
-
-static struct segment_type_table_t {
-    const Elf_Word key;
-    const char* str;
-} segment_type_table[] = {
-    {PT_NULL, "NULL"}, {PT_LOAD, "LOAD"},   {PT_DYNAMIC, "DYNAMIC"}, {PT_INTERP, "INTERP"},
-    {PT_NOTE, "NOTE"}, {PT_SHLIB, "SHLIB"}, {PT_PHDR, "PHDR"},       {PT_TLS, "TLS"},
-};
-
-
-static struct segment_flag_table_t {
-    const Elf_Word key;
-    const char* str;
-} segment_flag_table[] = {
-    {0, ""}, {1, "X"}, {2, "W"}, {3, "WX"}, {4, "R"}, {5, "RX"}, {6, "RW"}, {7, "RWX"},
-};
-
-
-static struct symbol_bind_t {
-    const Elf_Word key;
-    const char* str;
-} symbol_bind_table[] = {
-    {STB_LOCAL, "LOCAL"},   {STB_GLOBAL, "GLOBAL"}, {STB_WEAK, "WEAK"},
-    {STB_LOOS, "LOOS"},     {STB_HIOS, "HIOS"},     {STB_MULTIDEF, "MULTIDEF"},
-    {STB_LOPROC, "LOPROC"}, {STB_HIPROC, "HIPROC"},
-};
-
-
-static struct symbol_type_t {
-    const Elf_Word key;
-    const char* str;
-} symbol_type_table[] = {
-    {STT_NOTYPE, "NOTYPE"},   {STT_OBJECT, "OBJECT"}, {STT_FUNC, "FUNC"},
-    {STT_SECTION, "SECTION"}, {STT_FILE, "FILE"},     {STT_COMMON, "COMMON"},
-    {STT_TLS, "TLS"},         {STT_LOOS, "LOOS"},     {STT_HIOS, "HIOS"},
-    {STT_LOPROC, "LOPROC"},   {STT_HIPROC, "HIPROC"},
-};
-
-
-static struct dynamic_tag_t {
-    const Elf_Word key;
-    const char* str;
-} dynamic_tag_table[] = {
-    {DT_NULL, "NULL"},
-    {DT_NEEDED, "NEEDED"},
-    {DT_PLTRELSZ, "PLTRELSZ"},
-    {DT_PLTGOT, "PLTGOT"},
-    {DT_HASH, "HASH"},
-    {DT_STRTAB, "STRTAB"},
-    {DT_SYMTAB, "SYMTAB"},
-    {DT_RELA, "RELA"},
-    {DT_RELASZ, "RELASZ"},
-    {DT_RELAENT, "RELAENT"},
-    {DT_STRSZ, "STRSZ"},
-    {DT_SYMENT, "SYMENT"},
-    {DT_INIT, "INIT"},
-    {DT_FINI, "FINI"},
-    {DT_SONAME, "SONAME"},
-    {DT_RPATH, "RPATH"},
-    {DT_SYMBOLIC, "SYMBOLIC"},
-    {DT_REL, "REL"},
-    {DT_RELSZ, "RELSZ"},
-    {DT_RELENT, "RELENT"},
-    {DT_PLTREL, "PLTREL"},
-    {DT_DEBUG, "DEBUG"},
-    {DT_TEXTREL, "TEXTREL"},
-    {DT_JMPREL, "JMPREL"},
-    {DT_BIND_NOW, "BIND_NOW"},
-    {DT_INIT_ARRAY, "INIT_ARRAY"},
-    {DT_FINI_ARRAY, "FINI_ARRAY"},
-    {DT_INIT_ARRAYSZ, "INIT_ARRAYSZ"},
-    {DT_FINI_ARRAYSZ, "FINI_ARRAYSZ"},
-    {DT_RUNPATH, "RUNPATH"},
-    {DT_FLAGS, "FLAGS"},
-    {DT_ENCODING, "ENCODING"},
-    {DT_PREINIT_ARRAY, "PREINIT_ARRAY"},
-    {DT_PREINIT_ARRAYSZ, "PREINIT_ARRAYSZ"},
-    {DT_MAXPOSTAGS, "MAXPOSTAGS"},
-};
-
-static const ELFIO::Elf_Xword MAX_DATA_ENTRIES = 64;
-
-//------------------------------------------------------------------------------
-class dump {
-#define DUMP_DEC_FORMAT(width) std::setw(width) << std::setfill(' ') << std::dec << std::right
-#define DUMP_HEX_FORMAT(width) std::setw(width) << std::setfill('0') << std::hex << std::right
-#define DUMP_STR_FORMAT(width) std::setw(width) << std::setfill(' ') << std::hex << std::left
-
-   public:
-    //------------------------------------------------------------------------------
-    static void header(std::ostream& out, const elfio& reader) {
-        out << "ELF Header" << std::endl
-            << std::endl
-            << "  Class:      " << str_class(reader.get_class()) << std::endl
-            << "  Encoding:   " << str_endian(reader.get_encoding()) << std::endl
-            << "  ELFVersion: " << str_version(reader.get_elf_version()) << std::endl
-            << "  Type:       " << str_type(reader.get_type()) << std::endl
-            << "  Machine:    " << str_machine(reader.get_machine()) << std::endl
-            << "  Version:    " << str_version(reader.get_version()) << std::endl
-            << "  Entry:      "
-            << "0x" << std::hex << reader.get_entry() << std::endl
-            << "  Flags:      "
-            << "0x" << std::hex << reader.get_flags() << std::endl
-            << std::endl;
-    }
-
-    //------------------------------------------------------------------------------
-    static void section_headers(std::ostream& out, const elfio& reader) {
-        Elf_Half n = reader.sections.size();
-
-        if (n == 0) {
-            return;
-        }
-
-        out << "Section Headers:" << std::endl;
-        if (reader.get_class() == ELFCLASS32) {  // Output for 32-bit
-            out << "[  Nr ] Type              Addr     Size     ES Flg Lk Inf Al Name" << std::endl;
-        } else {  // Output for 64-bit
-            out << "[  Nr ] Type              Addr             Size             ES   Flg"
-                << std::endl
-                << "        Lk   Inf  Al      Name" << std::endl;
-        }
-
-        for (Elf_Half i = 0; i < n; ++i) {  // For all sections
-            section* sec = reader.sections[i];
-            section_header(out, i, sec, reader.get_class());
-        }
-
-        out << "Key to Flags: W (write), A (alloc), X (execute)\n\n" << std::endl;
-    }
-
-    //------------------------------------------------------------------------------
-    static void section_header(std::ostream& out, Elf_Half no, const section* sec,
-                               unsigned char elf_class) {
-        std::ios_base::fmtflags original_flags = out.flags();
-
-        if (elf_class == ELFCLASS32) {  // Output for 32-bit
-            out << "[" << DUMP_DEC_FORMAT(5) << no << "] " << DUMP_STR_FORMAT(17)
-                << str_section_type(sec->get_type()) << " " << DUMP_HEX_FORMAT(8)
-                << sec->get_address() << " " << DUMP_HEX_FORMAT(8) << sec->get_size() << " "
-                << DUMP_HEX_FORMAT(2) << sec->get_entry_size() << " " << DUMP_STR_FORMAT(3)
-                << section_flags(sec->get_flags()) << " " << DUMP_HEX_FORMAT(2) << sec->get_link()
-                << " " << DUMP_HEX_FORMAT(3) << sec->get_info() << " " << DUMP_HEX_FORMAT(2)
-                << sec->get_addr_align() << " " << DUMP_STR_FORMAT(17) << sec->get_name() << " "
-                << std::endl;
-        } else {  // Output for 64-bit
-            out << "[" << DUMP_DEC_FORMAT(5) << no << "] " << DUMP_STR_FORMAT(17)
-                << str_section_type(sec->get_type()) << " " << DUMP_HEX_FORMAT(16)
-                << sec->get_address() << " " << DUMP_HEX_FORMAT(16) << sec->get_size() << " "
-                << DUMP_HEX_FORMAT(4) << sec->get_entry_size() << " " << DUMP_STR_FORMAT(3)
-                << section_flags(sec->get_flags()) << " " << std::endl
-                << "        " << DUMP_HEX_FORMAT(4) << sec->get_link() << " " << DUMP_HEX_FORMAT(4)
-                << sec->get_info() << " " << DUMP_HEX_FORMAT(4) << sec->get_addr_align() << "    "
-                << DUMP_STR_FORMAT(17) << sec->get_name() << " " << std::endl;
-        }
-
-        out.flags(original_flags);
-
-        return;
-    }
-
-    //------------------------------------------------------------------------------
-    static void segment_headers(std::ostream& out, const elfio& reader) {
-        Elf_Half n = reader.segments.size();
-        if (n == 0) {
-            return;
-        }
-
-        out << "Segment headers:" << std::endl;
-        if (reader.get_class() == ELFCLASS32) {  // Output for 32-bit
-            out << "[  Nr ] Type           VirtAddr PhysAddr FileSize Mem.Size Flags    Align"
-                << std::endl;
-        } else {  // Output for 64-bit
-            out << "[  Nr ] Type           VirtAddr         PhysAddr         Flags" << std::endl
-                << "                       FileSize         Mem.Size         Align" << std::endl;
-        }
-
-        for (Elf_Half i = 0; i < n; ++i) {
-            segment* seg = reader.segments[i];
-            segment_header(out, i, seg, reader.get_class());
-        }
-
-        out << std::endl;
-    }
-
-    //------------------------------------------------------------------------------
-    static void segment_header(std::ostream& out, Elf_Half no, const segment* seg,
-                               unsigned int elf_class) {
-        std::ios_base::fmtflags original_flags = out.flags();
-
-        if (elf_class == ELFCLASS32) {  // Output for 32-bit
-            out << "[" << DUMP_DEC_FORMAT(5) << no << "] " << DUMP_STR_FORMAT(14)
-                << str_segment_type(seg->get_type()) << " " << DUMP_HEX_FORMAT(8)
-                << seg->get_virtual_address() << " " << DUMP_HEX_FORMAT(8)
-                << seg->get_physical_address() << " " << DUMP_HEX_FORMAT(8) << seg->get_file_size()
-                << " " << DUMP_HEX_FORMAT(8) << seg->get_memory_size() << " " << DUMP_STR_FORMAT(8)
-                << str_segment_flag(seg->get_flags()) << " " << DUMP_HEX_FORMAT(8)
-                << seg->get_align() << " " << std::endl;
-        } else {  // Output for 64-bit
-            out << "[" << DUMP_DEC_FORMAT(5) << no << "] " << DUMP_STR_FORMAT(14)
-                << str_segment_type(seg->get_type()) << " " << DUMP_HEX_FORMAT(16)
-                << seg->get_virtual_address() << " " << DUMP_HEX_FORMAT(16)
-                << seg->get_physical_address() << " " << DUMP_STR_FORMAT(16)
-                << str_segment_flag(seg->get_flags()) << " " << std::endl
-                << "                       " << DUMP_HEX_FORMAT(16) << seg->get_file_size() << " "
-                << DUMP_HEX_FORMAT(16) << seg->get_memory_size() << " " << DUMP_HEX_FORMAT(16)
-                << seg->get_align() << " " << std::endl;
-        }
-
-        out.flags(original_flags);
-    }
-
-    //------------------------------------------------------------------------------
-    static void symbol_tables(std::ostream& out, const elfio& reader) {
-        Elf_Half n = reader.sections.size();
-        for (Elf_Half i = 0; i < n; ++i) {  // For all sections
-            section* sec = reader.sections[i];
-            if (SHT_SYMTAB == sec->get_type() || SHT_DYNSYM == sec->get_type()) {
-                symbol_section_accessor symbols(reader, sec);
-
-                Elf_Xword sym_no = symbols.get_symbols_num();
-                if (sym_no > 0) {
-                    out << "Symbol table (" << sec->get_name() << ")" << std::endl;
-                    if (reader.get_class() == ELFCLASS32) {  // Output for 32-bit
-                        out << "[  Nr ] Value    Size     Type    Bind      Sect Name" << std::endl;
-                    } else {  // Output for 64-bit
-                        out << "[  Nr ] Value            Size             Type    Bind      Sect"
-                            << std::endl
-                            << "        Name" << std::endl;
-                    }
-                    for (Elf_Half i = 0; i < sym_no; ++i) {
-                        std::string name;
-                        Elf64_Addr value = 0;
-                        Elf_Xword size = 0;
-                        unsigned char bind = 0;
-                        unsigned char type = 0;
-                        Elf_Half section = 0;
-                        unsigned char other = 0;
-                        symbols.get_symbol(i, name, value, size, bind, type, section, other);
-                        symbol_table(out, i, name, value, size, bind, type, section,
-                                     reader.get_class());
-                    }
-
-                    out << std::endl;
-                }
-            }
-        }
-    }
-
-    //------------------------------------------------------------------------------
-    static void symbol_table(std::ostream& out, Elf_Half no, std::string& name, Elf64_Addr value,
-                             Elf_Xword size, unsigned char bind, unsigned char type,
-                             Elf_Half section, unsigned int elf_class) {
-        std::ios_base::fmtflags original_flags = out.flags();
-
-        if (elf_class == ELFCLASS32) {  // Output for 32-bit
-            out << "[" << DUMP_DEC_FORMAT(5) << no << "] " << DUMP_HEX_FORMAT(8) << value << " "
-                << DUMP_HEX_FORMAT(8) << size << " " << DUMP_STR_FORMAT(7) << str_symbol_type(type)
-                << " " << DUMP_STR_FORMAT(8) << str_symbol_bind(bind) << " " << DUMP_DEC_FORMAT(5)
-                << section << " " << DUMP_STR_FORMAT(1) << name << " " << std::endl;
-        } else {  // Output for 64-bit
-            out << "[" << DUMP_DEC_FORMAT(5) << no << "] " << DUMP_HEX_FORMAT(16) << value << " "
-                << DUMP_HEX_FORMAT(16) << size << " " << DUMP_STR_FORMAT(7) << str_symbol_type(type)
-                << " " << DUMP_STR_FORMAT(8) << str_symbol_bind(bind) << " " << DUMP_DEC_FORMAT(5)
-                << section << " " << std::endl
-                << "        " << DUMP_STR_FORMAT(1) << name << " " << std::endl;
-        }
-
-        out.flags(original_flags);
-    }
-
-    //------------------------------------------------------------------------------
-    static void notes(std::ostream& out, const elfio& reader) {
-        Elf_Half no = reader.sections.size();
-        for (Elf_Half i = 0; i < no; ++i) {  // For all sections
-            section* sec = reader.sections[i];
-            if (SHT_NOTE == sec->get_type()) {  // Look at notes
-                note_section_accessor notes(reader, sec);
-                int no_notes = notes.get_notes_num();
-                if (no > 0) {
-                    out << "Note section (" << sec->get_name() << ")" << std::endl
-                        << "    No Type     Name" << std::endl;
-                    for (int j = 0; j < no_notes; ++j) {  // For all notes
-                        Elf_Word type;
-                        std::string name;
-                        void* desc;
-                        Elf_Word descsz;
-
-                        if (notes.get_note(j, type, name, desc, descsz)) {
-                            // 'name' usually contains \0 at the end. Try to fix it
-                            name = name.c_str();
-                            note(out, j, type, name);
-                        }
-                    }
-
-                    out << std::endl;
-                }
-            }
-        }
-    }
-
-    //------------------------------------------------------------------------------
-    static void note(std::ostream& out, int no, Elf_Word type, const std::string& name) {
-        out << "  [" << DUMP_DEC_FORMAT(2) << no << "] " << DUMP_HEX_FORMAT(8) << type << " "
-            << DUMP_STR_FORMAT(1) << name << std::endl;
-    }
-
-    //------------------------------------------------------------------------------
-    static void dynamic_tags(std::ostream& out, const elfio& reader) {
-        Elf_Half n = reader.sections.size();
-        for (Elf_Half i = 0; i < n; ++i) {  // For all sections
-            section* sec = reader.sections[i];
-            if (SHT_DYNAMIC == sec->get_type()) {
-                dynamic_section_accessor dynamic(reader, sec);
-
-                Elf_Xword dyn_no = dynamic.get_entries_num();
-                if (dyn_no > 0) {
-                    out << "Dynamic section (" << sec->get_name() << ")" << std::endl;
-                    out << "[  Nr ] Tag              Name/Value" << std::endl;
-                    for (int i = 0; i < dyn_no; ++i) {
-                        Elf_Xword tag = 0;
-                        Elf_Xword value = 0;
-                        std::string str;
-                        dynamic.get_entry(i, tag, value, str);
-                        dynamic_tag(out, i, tag, value, str, reader.get_class());
-                        if (DT_NULL == tag) {
-                            break;
-                        }
-                    }
-
-                    out << std::endl;
-                }
-            }
-        }
-    }
-
-    //------------------------------------------------------------------------------
-    static void dynamic_tag(std::ostream& out, int no, Elf_Xword tag, Elf_Xword value,
-                            std::string str, unsigned int /*elf_class*/) {
-        out << "[" << DUMP_DEC_FORMAT(5) << no << "] " << DUMP_STR_FORMAT(16)
-            << str_dynamic_tag(tag) << " ";
-        if (str.empty()) {
-            out << DUMP_HEX_FORMAT(16) << value << " ";
-        } else {
-            out << DUMP_STR_FORMAT(32) << str << " ";
-        }
-        out << std::endl;
-    }
-
-    //------------------------------------------------------------------------------
-    static void section_data(std::ostream& out, const section* sec) {
-        std::ios_base::fmtflags original_flags = out.flags();
-
-        out << sec->get_name() << std::endl;
-        const char* pdata = sec->get_data();
-        if (pdata) {
-            ELFIO::Elf_Xword i;
-            for (i = 0; i < std::min(sec->get_size(), MAX_DATA_ENTRIES); ++i) {
-                if (i % 16 == 0) {
-                    out << "[" << DUMP_HEX_FORMAT(8) << i << "]";
-                }
-
-                out << " " << DUMP_HEX_FORMAT(2) << (pdata[i] & 0x000000FF);
-
-                if (i % 16 == 15) {
-                    out << std::endl;
-                }
-            }
-            if (i % 16 != 0) {
-                out << std::endl;
-            }
-
-            out.flags(original_flags);
-        }
-
-        return;
-    }
-
-    //------------------------------------------------------------------------------
-    static void section_datas(std::ostream& out, const elfio& reader) {
-        Elf_Half n = reader.sections.size();
-
-        if (n == 0) {
-            return;
-        }
-
-        out << "Section Data:" << std::endl;
-
-        for (Elf_Half i = 1; i < n; ++i) {  // For all sections
-            section* sec = reader.sections[i];
-            if (sec->get_type() == SHT_NOBITS) {
-                continue;
-            }
-            section_data(out, sec);
-        }
-
-        out << std::endl;
-    }
-
-    //------------------------------------------------------------------------------
-    static void segment_data(std::ostream& out, Elf_Half no, const segment* seg) {
-        std::ios_base::fmtflags original_flags = out.flags();
-
-        out << "Segment # " << no << std::endl;
-        const char* pdata = seg->get_data();
-        if (pdata) {
-            ELFIO::Elf_Xword i;
-            for (i = 0; i < std::min(seg->get_file_size(), MAX_DATA_ENTRIES); ++i) {
-                if (i % 16 == 0) {
-                    out << "[" << DUMP_HEX_FORMAT(8) << i << "]";
-                }
-
-                out << " " << DUMP_HEX_FORMAT(2) << (pdata[i] & 0x000000FF);
-
-                if (i % 16 == 15) {
-                    out << std::endl;
-                }
-            }
-            if (i % 16 != 0) {
-                out << std::endl;
-            }
-
-            out.flags(original_flags);
-        }
-
-        return;
-    }
-
-    //------------------------------------------------------------------------------
-    static void segment_datas(std::ostream& out, const elfio& reader) {
-        Elf_Half n = reader.segments.size();
-
-        if (n == 0) {
-            return;
-        }
-
-        out << "Segment Data:" << std::endl;
-
-        for (Elf_Half i = 0; i < n; ++i) {  // For all sections
-            segment* seg = reader.segments[i];
-            segment_data(out, i, seg);
-        }
-
-        out << std::endl;
-    }
-
-   private:
-    //------------------------------------------------------------------------------
-    template <typename T, typename K>
-    std::string static find_value_in_table(const T& table, const K& key) {
-        std::string res = "?";
-        for (unsigned int i = 0; i < sizeof(table) / sizeof(table[0]); ++i) {
-            if (table[i].key == key) {
-                res = table[i].str;
-                break;
-            }
-        }
-
-        return res;
-    }
-
-
-    //------------------------------------------------------------------------------
-    template <typename T, typename K>
-    static std::string format_assoc(const T& table, const K& key) {
-        std::string str = find_value_in_table(table, key);
-        if (str == "?") {
-            std::ostringstream oss;
-            oss << str << " (0x" << std::hex << key << ")";
-            str = oss.str();
-        }
-
-        return str;
-    }
-
-
-    //------------------------------------------------------------------------------
-    template <typename T>
-    static std::string format_assoc(const T& table, const char key) {
-        return format_assoc(table, (const int)key);
-    }
-
-
-    //------------------------------------------------------------------------------
-    static std::string section_flags(Elf_Xword flags) {
-        std::string ret = "";
-        if (flags & SHF_WRITE) {
-            ret += "W";
-        }
-        if (flags & SHF_ALLOC) {
-            ret += "A";
-        }
-        if (flags & SHF_EXECINSTR) {
-            ret += "X";
-        }
-
-        return ret;
-    }
-
-
-//------------------------------------------------------------------------------
-#define STR_FUNC_TABLE(name)                                                                       \
-    template <typename T>                                                                          \
-    static std::string str_##name(const T key) {                                                   \
-        return format_assoc(name##_table, key);                                                    \
-    }
-
-    STR_FUNC_TABLE(class)
-    STR_FUNC_TABLE(endian)
-    STR_FUNC_TABLE(version)
-    STR_FUNC_TABLE(type)
-    STR_FUNC_TABLE(machine)
-    STR_FUNC_TABLE(section_type)
-    STR_FUNC_TABLE(segment_type)
-    STR_FUNC_TABLE(segment_flag)
-    STR_FUNC_TABLE(symbol_bind)
-    STR_FUNC_TABLE(symbol_type)
-    STR_FUNC_TABLE(dynamic_tag)
-
-#undef STR_FUNC_TABLE
-#undef DUMP_DEC_FORMAT
-#undef DUMP_HEX_FORMAT
-#undef DUMP_STR_FORMAT
-};  // class dump
-
-
-};  // namespace ELFIO
-
-#endif  // ELFIO_DUMP_HPP
diff --git a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_dynamic.hpp b/third_party/rocm/include/hip/hcc_detail/elfio/elfio_dynamic.hpp
deleted file mode 100644
index 53a6e28..0000000
--- a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_dynamic.hpp
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
-Copyright (C) 2001-2015 by Serge Lamikhov-Center
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef ELFIO_DYNAMIC_HPP
-#define ELFIO_DYNAMIC_HPP
-
-namespace ELFIO {
-
-//------------------------------------------------------------------------------
-class dynamic_section_accessor {
-   public:
-    //------------------------------------------------------------------------------
-    dynamic_section_accessor(const elfio& elf_file_, section* section_)
-        : elf_file(elf_file_), dynamic_section(section_) {}
-
-    //------------------------------------------------------------------------------
-    Elf_Xword get_entries_num() const {
-        Elf_Xword nRet = 0;
-
-        if (0 != dynamic_section->get_entry_size()) {
-            nRet = dynamic_section->get_size() / dynamic_section->get_entry_size();
-        }
-
-        return nRet;
-    }
-
-    //------------------------------------------------------------------------------
-    bool get_entry(Elf_Xword index, Elf_Xword& tag, Elf_Xword& value, std::string& str) const {
-        if (index >= get_entries_num()) {  // Is index valid
-            return false;
-        }
-
-        if (elf_file.get_class() == ELFCLASS32) {
-            generic_get_entry_dyn<Elf32_Dyn>(index, tag, value);
-        } else {
-            generic_get_entry_dyn<Elf64_Dyn>(index, tag, value);
-        }
-
-        // If the tag may have a string table reference, prepare the string
-        if (tag == DT_NEEDED || tag == DT_SONAME || tag == DT_RPATH || tag == DT_RUNPATH) {
-            string_section_accessor strsec = elf_file.sections[get_string_table_index()];
-            const char* result = strsec.get_string(value);
-            if (0 == result) {
-                str.clear();
-                return false;
-            }
-            str = result;
-        } else {
-            str.clear();
-        }
-
-        return true;
-    }
-
-    //------------------------------------------------------------------------------
-    void add_entry(Elf_Xword& tag, Elf_Xword& value) {
-        if (elf_file.get_class() == ELFCLASS32) {
-            generic_add_entry<Elf32_Dyn>(tag, value);
-        } else {
-            generic_add_entry<Elf64_Dyn>(tag, value);
-        }
-    }
-
-    //------------------------------------------------------------------------------
-    void add_entry(Elf_Xword& tag, std::string& str) {
-        string_section_accessor strsec = elf_file.sections[get_string_table_index()];
-        Elf_Xword value = strsec.add_string(str);
-        add_entry(tag, value);
-    }
-
-    //------------------------------------------------------------------------------
-   private:
-    //------------------------------------------------------------------------------
-    Elf_Half get_string_table_index() const { return (Elf_Half)dynamic_section->get_link(); }
-
-    //------------------------------------------------------------------------------
-    template <class T>
-    void generic_get_entry_dyn(Elf_Xword index, Elf_Xword& tag, Elf_Xword& value) const {
-        const endianess_convertor& convertor = elf_file.get_convertor();
-
-        // Check unusual case when dynamic section has no data
-        if (dynamic_section->get_data() == 0 ||
-            (index + 1) * dynamic_section->get_entry_size() > dynamic_section->get_size()) {
-            tag = DT_NULL;
-            value = 0;
-            return;
-        }
-
-        const T* pEntry = reinterpret_cast<const T*>(dynamic_section->get_data() +
-                                                     index * dynamic_section->get_entry_size());
-        tag = convertor(pEntry->d_tag);
-        switch (tag) {
-            case DT_NULL:
-            case DT_SYMBOLIC:
-            case DT_TEXTREL:
-            case DT_BIND_NOW:
-                value = 0;
-                break;
-            case DT_NEEDED:
-            case DT_PLTRELSZ:
-            case DT_RELASZ:
-            case DT_RELAENT:
-            case DT_STRSZ:
-            case DT_SYMENT:
-            case DT_SONAME:
-            case DT_RPATH:
-            case DT_RELSZ:
-            case DT_RELENT:
-            case DT_PLTREL:
-            case DT_INIT_ARRAYSZ:
-            case DT_FINI_ARRAYSZ:
-            case DT_RUNPATH:
-            case DT_FLAGS:
-            case DT_PREINIT_ARRAYSZ:
-                value = convertor(pEntry->d_un.d_val);
-                break;
-            case DT_PLTGOT:
-            case DT_HASH:
-            case DT_STRTAB:
-            case DT_SYMTAB:
-            case DT_RELA:
-            case DT_INIT:
-            case DT_FINI:
-            case DT_REL:
-            case DT_DEBUG:
-            case DT_JMPREL:
-            case DT_INIT_ARRAY:
-            case DT_FINI_ARRAY:
-            case DT_PREINIT_ARRAY:
-            default:
-                value = convertor(pEntry->d_un.d_ptr);
-                break;
-        }
-    }
-
-    //------------------------------------------------------------------------------
-    template <class T>
-    void generic_add_entry(Elf_Xword tag, Elf_Xword value) {
-        const endianess_convertor& convertor = elf_file.get_convertor();
-
-        T entry;
-
-        switch (tag) {
-            case DT_NULL:
-            case DT_SYMBOLIC:
-            case DT_TEXTREL:
-            case DT_BIND_NOW:
-                value = 0;
-            case DT_NEEDED:
-            case DT_PLTRELSZ:
-            case DT_RELASZ:
-            case DT_RELAENT:
-            case DT_STRSZ:
-            case DT_SYMENT:
-            case DT_SONAME:
-            case DT_RPATH:
-            case DT_RELSZ:
-            case DT_RELENT:
-            case DT_PLTREL:
-            case DT_INIT_ARRAYSZ:
-            case DT_FINI_ARRAYSZ:
-            case DT_RUNPATH:
-            case DT_FLAGS:
-            case DT_PREINIT_ARRAYSZ:
-                entry.d_un.d_val = convertor(value);
-                break;
-            case DT_PLTGOT:
-            case DT_HASH:
-            case DT_STRTAB:
-            case DT_SYMTAB:
-            case DT_RELA:
-            case DT_INIT:
-            case DT_FINI:
-            case DT_REL:
-            case DT_DEBUG:
-            case DT_JMPREL:
-            case DT_INIT_ARRAY:
-            case DT_FINI_ARRAY:
-            case DT_PREINIT_ARRAY:
-            default:
-                entry.d_un.d_ptr = convertor(value);
-                break;
-        }
-
-        entry.d_tag = convertor(tag);
-
-        dynamic_section->append_data(reinterpret_cast<char*>(&entry), sizeof(entry));
-    }
-
-    //------------------------------------------------------------------------------
-   private:
-    const elfio& elf_file;
-    section* dynamic_section;
-};
-
-}  // namespace ELFIO
-
-#endif  // ELFIO_DYNAMIC_HPP
diff --git a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_header.hpp b/third_party/rocm/include/hip/hcc_detail/elfio/elfio_header.hpp
deleted file mode 100644
index b95f0a9..0000000
--- a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_header.hpp
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
-Copyright (C) 2001-2015 by Serge Lamikhov-Center
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef ELF_HEADER_HPP
-#define ELF_HEADER_HPP
-
-#include <iostream>
-
-namespace ELFIO {
-
-class elf_header {
-   public:
-    virtual ~elf_header(){};
-    virtual bool load(std::istream& stream) = 0;
-    virtual bool save(std::ostream& stream) const = 0;
-
-    // ELF header functions
-    ELFIO_GET_ACCESS_DECL(unsigned char, class);
-    ELFIO_GET_ACCESS_DECL(unsigned char, elf_version);
-    ELFIO_GET_ACCESS_DECL(unsigned char, encoding);
-    ELFIO_GET_ACCESS_DECL(Elf_Word, version);
-    ELFIO_GET_ACCESS_DECL(Elf_Half, header_size);
-    ELFIO_GET_ACCESS_DECL(Elf_Half, section_entry_size);
-    ELFIO_GET_ACCESS_DECL(Elf_Half, segment_entry_size);
-
-    ELFIO_GET_SET_ACCESS_DECL(unsigned char, os_abi);
-    ELFIO_GET_SET_ACCESS_DECL(unsigned char, abi_version);
-    ELFIO_GET_SET_ACCESS_DECL(Elf_Half, type);
-    ELFIO_GET_SET_ACCESS_DECL(Elf_Half, machine);
-    ELFIO_GET_SET_ACCESS_DECL(Elf_Word, flags);
-    ELFIO_GET_SET_ACCESS_DECL(Elf64_Addr, entry);
-    ELFIO_GET_SET_ACCESS_DECL(Elf_Half, sections_num);
-    ELFIO_GET_SET_ACCESS_DECL(Elf64_Off, sections_offset);
-    ELFIO_GET_SET_ACCESS_DECL(Elf_Half, segments_num);
-    ELFIO_GET_SET_ACCESS_DECL(Elf64_Off, segments_offset);
-    ELFIO_GET_SET_ACCESS_DECL(Elf_Half, section_name_str_index);
-};
-
-
-template <class T>
-struct elf_header_impl_types;
-template <>
-struct elf_header_impl_types<Elf32_Ehdr> {
-    typedef Elf32_Phdr Phdr_type;
-    typedef Elf32_Shdr Shdr_type;
-    static const unsigned char file_class = ELFCLASS32;
-};
-template <>
-struct elf_header_impl_types<Elf64_Ehdr> {
-    typedef Elf64_Phdr Phdr_type;
-    typedef Elf64_Shdr Shdr_type;
-    static const unsigned char file_class = ELFCLASS64;
-};
-
-template <class T>
-class elf_header_impl : public elf_header {
-   public:
-    elf_header_impl(endianess_convertor* convertor_, unsigned char encoding) {
-        convertor = convertor_;
-
-        std::fill_n(reinterpret_cast<char*>(&header), sizeof(header), '\0');
-
-        header.e_ident[EI_MAG0] = ELFMAG0;
-        header.e_ident[EI_MAG1] = ELFMAG1;
-        header.e_ident[EI_MAG2] = ELFMAG2;
-        header.e_ident[EI_MAG3] = ELFMAG3;
-        header.e_ident[EI_CLASS] = elf_header_impl_types<T>::file_class;
-        header.e_ident[EI_DATA] = encoding;
-        header.e_ident[EI_VERSION] = EV_CURRENT;
-        header.e_version = EV_CURRENT;
-        header.e_version = (*convertor)(header.e_version);
-        header.e_ehsize = (sizeof(header));
-        header.e_ehsize = (*convertor)(header.e_ehsize);
-        header.e_shstrndx = (*convertor)((Elf_Half)1);
-        header.e_phentsize = sizeof(typename elf_header_impl_types<T>::Phdr_type);
-        header.e_shentsize = sizeof(typename elf_header_impl_types<T>::Shdr_type);
-        header.e_phentsize = (*convertor)(header.e_phentsize);
-        header.e_shentsize = (*convertor)(header.e_shentsize);
-    }
-
-    bool load(std::istream& stream) {
-        stream.seekg(0);
-        stream.read(reinterpret_cast<char*>(&header), sizeof(header));
-
-        return (stream.gcount() == sizeof(header));
-    }
-
-    bool save(std::ostream& stream) const {
-        stream.seekp(0);
-        stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
-
-        return stream.good();
-    }
-
-    // ELF header functions
-    ELFIO_GET_ACCESS(unsigned char, class, header.e_ident[EI_CLASS]);
-    ELFIO_GET_ACCESS(unsigned char, elf_version, header.e_ident[EI_VERSION]);
-    ELFIO_GET_ACCESS(unsigned char, encoding, header.e_ident[EI_DATA]);
-    ELFIO_GET_ACCESS(Elf_Word, version, header.e_version);
-    ELFIO_GET_ACCESS(Elf_Half, header_size, header.e_ehsize);
-    ELFIO_GET_ACCESS(Elf_Half, section_entry_size, header.e_shentsize);
-    ELFIO_GET_ACCESS(Elf_Half, segment_entry_size, header.e_phentsize);
-
-    ELFIO_GET_SET_ACCESS(unsigned char, os_abi, header.e_ident[EI_OSABI]);
-    ELFIO_GET_SET_ACCESS(unsigned char, abi_version, header.e_ident[EI_ABIVERSION]);
-    ELFIO_GET_SET_ACCESS(Elf_Half, type, header.e_type);
-    ELFIO_GET_SET_ACCESS(Elf_Half, machine, header.e_machine);
-    ELFIO_GET_SET_ACCESS(Elf_Word, flags, header.e_flags);
-    ELFIO_GET_SET_ACCESS(Elf_Half, section_name_str_index, header.e_shstrndx);
-    ELFIO_GET_SET_ACCESS(Elf64_Addr, entry, header.e_entry);
-    ELFIO_GET_SET_ACCESS(Elf_Half, sections_num, header.e_shnum);
-    ELFIO_GET_SET_ACCESS(Elf64_Off, sections_offset, header.e_shoff);
-    ELFIO_GET_SET_ACCESS(Elf_Half, segments_num, header.e_phnum);
-    ELFIO_GET_SET_ACCESS(Elf64_Off, segments_offset, header.e_phoff);
-
-   private:
-    T header;
-    endianess_convertor* convertor;
-};
-
-}  // namespace ELFIO
-
-#endif  // ELF_HEADER_HPP
diff --git a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_note.hpp b/third_party/rocm/include/hip/hcc_detail/elfio/elfio_note.hpp
deleted file mode 100644
index e350c85..0000000
--- a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_note.hpp
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
-Copyright (C) 2001-2015 by Serge Lamikhov-Center
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef ELFIO_NOTE_HPP
-#define ELFIO_NOTE_HPP
-
-namespace ELFIO {
-
-//------------------------------------------------------------------------------
-// There are discrepancies in documentations. SCO documentation
-// (http://www.sco.com/developers/gabi/latest/ch5.pheader.html#note_section)
-// requires 8 byte entries alignment for 64-bit ELF file,
-// but Oracle's definition uses the same structure
-// for 32-bit and 64-bit formats.
-// (https://docs.oracle.com/cd/E23824_01/html/819-0690/chapter6-18048.html)
-//
-// It looks like EM_X86_64 Linux implementation is similar to Oracle's
-// definition. Therefore, the same alignment works for both formats
-//------------------------------------------------------------------------------
-
-//------------------------------------------------------------------------------
-class note_section_accessor {
-   public:
-    //------------------------------------------------------------------------------
-    note_section_accessor(const elfio& elf_file_, section* section_)
-        : elf_file(elf_file_), note_section(section_) {
-        process_section();
-    }
-
-    //------------------------------------------------------------------------------
-    Elf_Word get_notes_num() const { return (Elf_Word)note_start_positions.size(); }
-
-    //------------------------------------------------------------------------------
-    bool get_note(Elf_Word index, Elf_Word& type, std::string& name, void*& desc,
-                  Elf_Word& descSize) const {
-        if (index >= note_section->get_size()) {
-            return false;
-        }
-
-        const char* pData = note_section->get_data() + note_start_positions[index];
-        int align = sizeof(Elf_Word);
-
-        const endianess_convertor& convertor = elf_file.get_convertor();
-        type = convertor(*(Elf_Word*)(pData + 2 * align));
-        Elf_Word namesz = convertor(*(Elf_Word*)(pData));
-        descSize = convertor(*(Elf_Word*)(pData + sizeof(namesz)));
-        Elf_Word max_name_size = note_section->get_size() - note_start_positions[index];
-        if (namesz > max_name_size || namesz + descSize > max_name_size) {
-            return false;
-        }
-        name.assign(pData + 3 * align, namesz - 1);
-        if (0 == descSize) {
-            desc = 0;
-        } else {
-            desc = const_cast<char*>(pData + 3 * align + ((namesz + align - 1) / align) * align);
-        }
-
-        return true;
-    }
-
-    //------------------------------------------------------------------------------
-    void add_note(Elf_Word type, const std::string& name, const void* desc, Elf_Word descSize) {
-        const endianess_convertor& convertor = elf_file.get_convertor();
-
-        int align = sizeof(Elf_Word);
-        Elf_Word nameLen = (Elf_Word)name.size() + 1;
-        Elf_Word nameLenConv = convertor(nameLen);
-        std::string buffer(reinterpret_cast<char*>(&nameLenConv), align);
-        Elf_Word descSizeConv = convertor(descSize);
-        buffer.append(reinterpret_cast<char*>(&descSizeConv), align);
-        type = convertor(type);
-        buffer.append(reinterpret_cast<char*>(&type), align);
-        buffer.append(name);
-        buffer.append(1, '\x00');
-        const char pad[] = {'\0', '\0', '\0', '\0'};
-        if (nameLen % align != 0) {
-            buffer.append(pad, align - nameLen % align);
-        }
-        if (desc != 0 && descSize != 0) {
-            buffer.append(reinterpret_cast<const char*>(desc), descSize);
-            if (descSize % align != 0) {
-                buffer.append(pad, align - descSize % align);
-            }
-        }
-
-        note_start_positions.push_back(note_section->get_size());
-        note_section->append_data(buffer);
-    }
-
-   private:
-    //------------------------------------------------------------------------------
-    void process_section() {
-        const endianess_convertor& convertor = elf_file.get_convertor();
-        const char* data = note_section->get_data();
-        Elf_Xword size = note_section->get_size();
-        Elf_Xword current = 0;
-
-        note_start_positions.clear();
-
-        // Is it empty?
-        if (0 == data || 0 == size) {
-            return;
-        }
-
-        int align = sizeof(Elf_Word);
-        while (current + 3 * align <= size) {
-            note_start_positions.push_back(current);
-            Elf_Word namesz = convertor(*(Elf_Word*)(data + current));
-            Elf_Word descsz = convertor(*(Elf_Word*)(data + current + sizeof(namesz)));
-
-            current += 3 * sizeof(Elf_Word) + ((namesz + align - 1) / align) * align +
-                       ((descsz + align - 1) / align) * align;
-        }
-    }
-
-    //------------------------------------------------------------------------------
-   private:
-    const elfio& elf_file;
-    section* note_section;
-    std::vector<Elf_Xword> note_start_positions;
-};
-
-}  // namespace ELFIO
-
-#endif  // ELFIO_NOTE_HPP
diff --git a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_relocation.hpp b/third_party/rocm/include/hip/hcc_detail/elfio/elfio_relocation.hpp
deleted file mode 100644
index 270c911..0000000
--- a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_relocation.hpp
+++ /dev/null
@@ -1,280 +0,0 @@
-/*
-Copyright (C) 2001-2015 by Serge Lamikhov-Center
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef ELFIO_RELOCATION_HPP
-#define ELFIO_RELOCATION_HPP
-
-namespace ELFIO {
-
-template <typename T>
-struct get_sym_and_type;
-template <>
-struct get_sym_and_type<Elf32_Rel> {
-    static int get_r_sym(Elf_Xword info) { return ELF32_R_SYM((Elf_Word)info); }
-    static int get_r_type(Elf_Xword info) { return ELF32_R_TYPE((Elf_Word)info); }
-};
-template <>
-struct get_sym_and_type<Elf32_Rela> {
-    static int get_r_sym(Elf_Xword info) { return ELF32_R_SYM((Elf_Word)info); }
-    static int get_r_type(Elf_Xword info) { return ELF32_R_TYPE((Elf_Word)info); }
-};
-template <>
-struct get_sym_and_type<Elf64_Rel> {
-    static int get_r_sym(Elf_Xword info) { return ELF64_R_SYM(info); }
-    static int get_r_type(Elf_Xword info) { return ELF64_R_TYPE(info); }
-};
-template <>
-struct get_sym_and_type<Elf64_Rela> {
-    static int get_r_sym(Elf_Xword info) { return ELF64_R_SYM(info); }
-    static int get_r_type(Elf_Xword info) { return ELF64_R_TYPE(info); }
-};
-
-
-//------------------------------------------------------------------------------
-class relocation_section_accessor {
-   public:
-    //------------------------------------------------------------------------------
-    relocation_section_accessor(const elfio& elf_file_, section* section_)
-        : elf_file(elf_file_), relocation_section(section_) {}
-
-    //------------------------------------------------------------------------------
-    Elf_Xword get_entries_num() const {
-        Elf_Xword nRet = 0;
-
-        if (0 != relocation_section->get_entry_size()) {
-            nRet = relocation_section->get_size() / relocation_section->get_entry_size();
-        }
-
-        return nRet;
-    }
-
-    //------------------------------------------------------------------------------
-    bool get_entry(Elf_Xword index, Elf64_Addr& offset, Elf_Word& symbol, Elf_Word& type,
-                   Elf_Sxword& addend) const {
-        if (index >= get_entries_num()) {  // Is index valid
-            return false;
-        }
-
-        if (elf_file.get_class() == ELFCLASS32) {
-            if (SHT_REL == relocation_section->get_type()) {
-                generic_get_entry_rel<Elf32_Rel>(index, offset, symbol, type, addend);
-            } else if (SHT_RELA == relocation_section->get_type()) {
-                generic_get_entry_rela<Elf32_Rela>(index, offset, symbol, type, addend);
-            }
-        } else {
-            if (SHT_REL == relocation_section->get_type()) {
-                generic_get_entry_rel<Elf64_Rel>(index, offset, symbol, type, addend);
-            } else if (SHT_RELA == relocation_section->get_type()) {
-                generic_get_entry_rela<Elf64_Rela>(index, offset, symbol, type, addend);
-            }
-        }
-
-        return true;
-    }
-
-    //------------------------------------------------------------------------------
-    bool get_entry(Elf_Xword index, Elf64_Addr& offset, Elf64_Addr& symbolValue,
-                   std::string& symbolName, Elf_Word& type, Elf_Sxword& addend,
-                   Elf_Sxword& calcValue) const {
-        // Do regular job
-        Elf_Word symbol;
-        bool ret = get_entry(index, offset, symbol, type, addend);
-
-        // Find the symbol
-        Elf_Xword size;
-        unsigned char bind;
-        unsigned char symbolType;
-        Elf_Half section;
-        unsigned char other;
-
-        symbol_section_accessor symbols(elf_file, elf_file.sections[get_symbol_table_index()]);
-        ret = ret && symbols.get_symbol(symbol, symbolName, symbolValue, size, bind, symbolType,
-                                        section, other);
-
-        if (ret) {  // Was it successful?
-            switch (type) {
-                case R_386_NONE:  // none
-                    calcValue = 0;
-                    break;
-                case R_386_32:  // S + A
-                    calcValue = symbolValue + addend;
-                    break;
-                case R_386_PC32:  // S + A - P
-                    calcValue = symbolValue + addend - offset;
-                    break;
-                case R_386_GOT32:  // G + A - P
-                    calcValue = 0;
-                    break;
-                case R_386_PLT32:  // L + A - P
-                    calcValue = 0;
-                    break;
-                case R_386_COPY:  // none
-                    calcValue = 0;
-                    break;
-                case R_386_GLOB_DAT:  // S
-                case R_386_JMP_SLOT:  // S
-                    calcValue = symbolValue;
-                    break;
-                case R_386_RELATIVE:  // B + A
-                    calcValue = addend;
-                    break;
-                case R_386_GOTOFF:  // S + A - GOT
-                    calcValue = 0;
-                    break;
-                case R_386_GOTPC:  // GOT + A - P
-                    calcValue = 0;
-                    break;
-                default:  // Not recognized symbol!
-                    calcValue = 0;
-                    break;
-            }
-        }
-
-        return ret;
-    }
-
-    //------------------------------------------------------------------------------
-    void add_entry(Elf64_Addr offset, Elf_Xword info) {
-        if (elf_file.get_class() == ELFCLASS32) {
-            generic_add_entry<Elf32_Rel>(offset, info);
-        } else {
-            generic_add_entry<Elf64_Rel>(offset, info);
-        }
-    }
-
-    //------------------------------------------------------------------------------
-    void add_entry(Elf64_Addr offset, Elf_Word symbol, unsigned char type) {
-        Elf_Xword info;
-        if (elf_file.get_class() == ELFCLASS32) {
-            info = ELF32_R_INFO((Elf_Xword)symbol, type);
-        } else {
-            info = ELF64_R_INFO((Elf_Xword)symbol, type);
-        }
-
-        add_entry(offset, info);
-    }
-
-    //------------------------------------------------------------------------------
-    void add_entry(Elf64_Addr offset, Elf_Xword info, Elf_Sxword addend) {
-        if (elf_file.get_class() == ELFCLASS32) {
-            generic_add_entry<Elf32_Rela>(offset, info, addend);
-        } else {
-            generic_add_entry<Elf64_Rela>(offset, info, addend);
-        }
-    }
-
-    //------------------------------------------------------------------------------
-    void add_entry(Elf64_Addr offset, Elf_Word symbol, unsigned char type, Elf_Sxword addend) {
-        Elf_Xword info;
-        if (elf_file.get_class() == ELFCLASS32) {
-            info = ELF32_R_INFO((Elf_Xword)symbol, type);
-        } else {
-            info = ELF64_R_INFO((Elf_Xword)symbol, type);
-        }
-
-        add_entry(offset, info, addend);
-    }
-
-    //------------------------------------------------------------------------------
-    void add_entry(string_section_accessor str_writer, const char* str,
-                   symbol_section_accessor sym_writer, Elf64_Addr value, Elf_Word size,
-                   unsigned char sym_info, unsigned char other, Elf_Half shndx, Elf64_Addr offset,
-                   unsigned char type) {
-        Elf_Word str_index = str_writer.add_string(str);
-        Elf_Word sym_index = sym_writer.add_symbol(str_index, value, size, sym_info, other, shndx);
-        add_entry(offset, sym_index, type);
-    }
-
-    //------------------------------------------------------------------------------
-   private:
-    //------------------------------------------------------------------------------
-    Elf_Half get_symbol_table_index() const { return (Elf_Half)relocation_section->get_link(); }
-
-    //------------------------------------------------------------------------------
-    template <class T>
-    void generic_get_entry_rel(Elf_Xword index, Elf64_Addr& offset, Elf_Word& symbol,
-                               Elf_Word& type, Elf_Sxword& addend) const {
-        const endianess_convertor& convertor = elf_file.get_convertor();
-
-        const T* pEntry = reinterpret_cast<const T*>(relocation_section->get_data() +
-                                                     index * relocation_section->get_entry_size());
-        offset = convertor(pEntry->r_offset);
-        Elf_Xword tmp = convertor(pEntry->r_info);
-        symbol = get_sym_and_type<T>::get_r_sym(tmp);
-        type = get_sym_and_type<T>::get_r_type(tmp);
-        addend = 0;
-    }
-
-    //------------------------------------------------------------------------------
-    template <class T>
-    void generic_get_entry_rela(Elf_Xword index, Elf64_Addr& offset, Elf_Word& symbol,
-                                Elf_Word& type, Elf_Sxword& addend) const {
-        const endianess_convertor& convertor = elf_file.get_convertor();
-
-        const T* pEntry = reinterpret_cast<const T*>(relocation_section->get_data() +
-                                                     index * relocation_section->get_entry_size());
-        offset = convertor(pEntry->r_offset);
-        Elf_Xword tmp = convertor(pEntry->r_info);
-        symbol = get_sym_and_type<T>::get_r_sym(tmp);
-        type = get_sym_and_type<T>::get_r_type(tmp);
-        addend = convertor(pEntry->r_addend);
-    }
-
-    //------------------------------------------------------------------------------
-    template <class T>
-    void generic_add_entry(Elf64_Addr offset, Elf_Xword info) {
-        const endianess_convertor& convertor = elf_file.get_convertor();
-
-        T entry;
-        entry.r_offset = offset;
-        entry.r_info = info;
-        entry.r_offset = convertor(entry.r_offset);
-        entry.r_info = convertor(entry.r_info);
-
-        relocation_section->append_data(reinterpret_cast<char*>(&entry), sizeof(entry));
-    }
-
-    //------------------------------------------------------------------------------
-    template <class T>
-    void generic_add_entry(Elf64_Addr offset, Elf_Xword info, Elf_Sxword addend) {
-        const endianess_convertor& convertor = elf_file.get_convertor();
-
-        T entry;
-        entry.r_offset = offset;
-        entry.r_info = info;
-        entry.r_addend = addend;
-        entry.r_offset = convertor(entry.r_offset);
-        entry.r_info = convertor(entry.r_info);
-        entry.r_addend = convertor(entry.r_addend);
-
-        relocation_section->append_data(reinterpret_cast<char*>(&entry), sizeof(entry));
-    }
-
-    //------------------------------------------------------------------------------
-   private:
-    const elfio& elf_file;
-    section* relocation_section;
-};
-
-}  // namespace ELFIO
-
-#endif  // ELFIO_RELOCATION_HPP
diff --git a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_section.hpp b/third_party/rocm/include/hip/hcc_detail/elfio/elfio_section.hpp
deleted file mode 100644
index 6106fc7..0000000
--- a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_section.hpp
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
-Copyright (C) 2001-2015 by Serge Lamikhov-Center
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef ELFIO_SECTION_HPP
-#define ELFIO_SECTION_HPP
-
-#include <string>
-#include <iostream>
-
-namespace ELFIO {
-
-class section {
-    friend class elfio;
-
-   public:
-    virtual ~section(){};
-
-    ELFIO_GET_ACCESS_DECL(Elf_Half, index);
-    ELFIO_GET_SET_ACCESS_DECL(std::string, name);
-    ELFIO_GET_SET_ACCESS_DECL(Elf_Word, type);
-    ELFIO_GET_SET_ACCESS_DECL(Elf_Xword, flags);
-    ELFIO_GET_SET_ACCESS_DECL(Elf_Word, info);
-    ELFIO_GET_SET_ACCESS_DECL(Elf_Word, link);
-    ELFIO_GET_SET_ACCESS_DECL(Elf_Xword, addr_align);
-    ELFIO_GET_SET_ACCESS_DECL(Elf_Xword, entry_size);
-    ELFIO_GET_SET_ACCESS_DECL(Elf64_Addr, address);
-    ELFIO_GET_SET_ACCESS_DECL(Elf_Xword, size);
-    ELFIO_GET_SET_ACCESS_DECL(Elf_Word, name_string_offset);
-
-    virtual const char* get_data() const = 0;
-    virtual void set_data(const char* pData, Elf_Word size) = 0;
-    virtual void set_data(const std::string& data) = 0;
-    virtual void append_data(const char* pData, Elf_Word size) = 0;
-    virtual void append_data(const std::string& data) = 0;
-
-   protected:
-    ELFIO_GET_SET_ACCESS_DECL(Elf64_Off, offset);
-    ELFIO_SET_ACCESS_DECL(Elf_Half, index);
-
-    virtual void load(std::istream& f, std::streampos header_offset) = 0;
-    virtual void save(std::ostream& f, std::streampos header_offset,
-                      std::streampos data_offset) = 0;
-    virtual bool is_address_initialized() const = 0;
-};
-
-
-template <class T>
-class section_impl : public section {
-   public:
-    //------------------------------------------------------------------------------
-    section_impl(const endianess_convertor* convertor_) : convertor(convertor_) {
-        std::fill_n(reinterpret_cast<char*>(&header), sizeof(header), '\0');
-        is_address_set = false;
-        data = 0;
-        data_size = 0;
-    }
-
-    //------------------------------------------------------------------------------
-    ~section_impl() { delete[] data; }
-
-    //------------------------------------------------------------------------------
-    // Section info functions
-    ELFIO_GET_SET_ACCESS(Elf_Word, type, header.sh_type);
-    ELFIO_GET_SET_ACCESS(Elf_Xword, flags, header.sh_flags);
-    ELFIO_GET_SET_ACCESS(Elf_Xword, size, header.sh_size);
-    ELFIO_GET_SET_ACCESS(Elf_Word, link, header.sh_link);
-    ELFIO_GET_SET_ACCESS(Elf_Word, info, header.sh_info);
-    ELFIO_GET_SET_ACCESS(Elf_Xword, addr_align, header.sh_addralign);
-    ELFIO_GET_SET_ACCESS(Elf_Xword, entry_size, header.sh_entsize);
-    ELFIO_GET_SET_ACCESS(Elf_Word, name_string_offset, header.sh_name);
-    ELFIO_GET_ACCESS(Elf64_Addr, address, header.sh_addr);
-
-    //------------------------------------------------------------------------------
-    Elf_Half get_index() const { return index; }
-
-
-    //------------------------------------------------------------------------------
-    std::string get_name() const { return name; }
-
-    //------------------------------------------------------------------------------
-    void set_name(std::string name_) { name = name_; }
-
-    //------------------------------------------------------------------------------
-    void set_address(Elf64_Addr value) {
-        header.sh_addr = value;
-        header.sh_addr = (*convertor)(header.sh_addr);
-        is_address_set = true;
-    }
-
-    //------------------------------------------------------------------------------
-    bool is_address_initialized() const { return is_address_set; }
-
-    //------------------------------------------------------------------------------
-    const char* get_data() const { return data; }
-
-    //------------------------------------------------------------------------------
-    void set_data(const char* raw_data, Elf_Word size) {
-        if (get_type() != SHT_NOBITS) {
-            delete[] data;
-            try {
-                data = new char[size];
-            } catch (const std::bad_alloc&) {
-                data = 0;
-                data_size = 0;
-                size = 0;
-            }
-            if (0 != data && 0 != raw_data) {
-                data_size = size;
-                std::copy(raw_data, raw_data + size, data);
-            }
-        }
-
-        set_size(size);
-    }
-
-    //------------------------------------------------------------------------------
-    void set_data(const std::string& str_data) {
-        return set_data(str_data.c_str(), (Elf_Word)str_data.size());
-    }
-
-    //------------------------------------------------------------------------------
-    void append_data(const char* raw_data, Elf_Word size) {
-        if (get_type() != SHT_NOBITS) {
-            if (get_size() + size < data_size) {
-                std::copy(raw_data, raw_data + size, data + get_size());
-            } else {
-                data_size = 2 * (data_size + size);
-                char* new_data;
-                try {
-                    new_data = new char[data_size];
-                } catch (const std::bad_alloc&) {
-                    new_data = 0;
-                    size = 0;
-                }
-                if (0 != new_data) {
-                    std::copy(data, data + get_size(), new_data);
-                    std::copy(raw_data, raw_data + size, new_data + get_size());
-                    delete[] data;
-                    data = new_data;
-                }
-            }
-            set_size(get_size() + size);
-        }
-    }
-
-    //------------------------------------------------------------------------------
-    void append_data(const std::string& str_data) {
-        return append_data(str_data.c_str(), (Elf_Word)str_data.size());
-    }
-
-    //------------------------------------------------------------------------------
-   protected:
-    //------------------------------------------------------------------------------
-    ELFIO_GET_SET_ACCESS(Elf64_Off, offset, header.sh_offset);
-
-    //------------------------------------------------------------------------------
-    void set_index(Elf_Half value) { index = value; }
-
-    //------------------------------------------------------------------------------
-    void load(std::istream& stream, std::streampos header_offset) {
-        std::fill_n(reinterpret_cast<char*>(&header), sizeof(header), '\0');
-        stream.seekg(header_offset);
-        stream.read(reinterpret_cast<char*>(&header), sizeof(header));
-
-        Elf_Xword size = get_size();
-        if (0 == data && SHT_NULL != get_type() && SHT_NOBITS != get_type()) {
-            try {
-                data = new char[size];
-            } catch (const std::bad_alloc&) {
-                data = 0;
-                data_size = 0;
-            }
-            if (0 != size) {
-                stream.seekg((*convertor)(header.sh_offset));
-                stream.read(data, size);
-                data_size = size;
-            }
-        }
-    }
-
-    //------------------------------------------------------------------------------
-    void save(std::ostream& f, std::streampos header_offset, std::streampos data_offset) {
-        if (0 != get_index()) {
-            header.sh_offset = data_offset;
-            header.sh_offset = (*convertor)(header.sh_offset);
-        }
-
-        save_header(f, header_offset);
-        if (get_type() != SHT_NOBITS && get_type() != SHT_NULL && get_size() != 0 && data != 0) {
-            save_data(f, data_offset);
-        }
-    }
-
-    //------------------------------------------------------------------------------
-   private:
-    //------------------------------------------------------------------------------
-    void save_header(std::ostream& f, std::streampos header_offset) const {
-        f.seekp(header_offset);
-        f.write(reinterpret_cast<const char*>(&header), sizeof(header));
-    }
-
-    //------------------------------------------------------------------------------
-    void save_data(std::ostream& f, std::streampos data_offset) const {
-        f.seekp(data_offset);
-        f.write(get_data(), get_size());
-    }
-
-    //------------------------------------------------------------------------------
-   private:
-    T header;
-    Elf_Half index;
-    std::string name;
-    char* data;
-    Elf_Word data_size;
-    const endianess_convertor* convertor;
-    bool is_address_set;
-};
-
-}  // namespace ELFIO
-
-#endif  // ELFIO_SECTION_HPP
diff --git a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_segment.hpp b/third_party/rocm/include/hip/hcc_detail/elfio/elfio_segment.hpp
deleted file mode 100644
index 59e37ec..0000000
--- a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_segment.hpp
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
-Copyright (C) 2001-2015 by Serge Lamikhov-Center
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef ELFIO_SEGMENT_HPP
-#define ELFIO_SEGMENT_HPP
-
-#include <iostream>
-#include <vector>
-
-namespace ELFIO {
-
-class segment {
-    friend class elfio;
-
-   public:
-    virtual ~segment(){};
-
-    ELFIO_GET_ACCESS_DECL(Elf_Half, index);
-    ELFIO_GET_SET_ACCESS_DECL(Elf_Word, type);
-    ELFIO_GET_SET_ACCESS_DECL(Elf_Word, flags);
-    ELFIO_GET_SET_ACCESS_DECL(Elf_Xword, align);
-    ELFIO_GET_SET_ACCESS_DECL(Elf64_Addr, virtual_address);
-    ELFIO_GET_SET_ACCESS_DECL(Elf64_Addr, physical_address);
-    ELFIO_GET_SET_ACCESS_DECL(Elf_Xword, file_size);
-    ELFIO_GET_SET_ACCESS_DECL(Elf_Xword, memory_size);
-    ELFIO_GET_ACCESS_DECL(Elf64_Off, offset);
-
-    virtual const char* get_data() const = 0;
-
-    virtual Elf_Half add_section_index(Elf_Half index, Elf_Xword addr_align) = 0;
-    virtual Elf_Half get_sections_num() const = 0;
-    virtual Elf_Half get_section_index_at(Elf_Half num) const = 0;
-    virtual bool is_offset_initialized() const = 0;
-
-   protected:
-    ELFIO_SET_ACCESS_DECL(Elf64_Off, offset);
-    ELFIO_SET_ACCESS_DECL(Elf_Half, index);
-
-    virtual const std::vector<Elf_Half>& get_sections() const = 0;
-    virtual void load(std::istream& stream, std::streampos header_offset) = 0;
-    virtual void save(std::ostream& f, std::streampos header_offset,
-                      std::streampos data_offset) = 0;
-};
-
-
-//------------------------------------------------------------------------------
-template <class T>
-class segment_impl : public segment {
-   public:
-    //------------------------------------------------------------------------------
-    segment_impl(endianess_convertor* convertor_) : convertor(convertor_) {
-        is_offset_set = false;
-        std::fill_n(reinterpret_cast<char*>(&ph), sizeof(ph), '\0');
-        data = 0;
-    }
-
-    //------------------------------------------------------------------------------
-    virtual ~segment_impl() { delete[] data; }
-
-    //------------------------------------------------------------------------------
-    // Section info functions
-    ELFIO_GET_SET_ACCESS(Elf_Word, type, ph.p_type);
-    ELFIO_GET_SET_ACCESS(Elf_Word, flags, ph.p_flags);
-    ELFIO_GET_SET_ACCESS(Elf_Xword, align, ph.p_align);
-    ELFIO_GET_SET_ACCESS(Elf64_Addr, virtual_address, ph.p_vaddr);
-    ELFIO_GET_SET_ACCESS(Elf64_Addr, physical_address, ph.p_paddr);
-    ELFIO_GET_SET_ACCESS(Elf_Xword, file_size, ph.p_filesz);
-    ELFIO_GET_SET_ACCESS(Elf_Xword, memory_size, ph.p_memsz);
-    ELFIO_GET_ACCESS(Elf64_Off, offset, ph.p_offset);
-
-    //------------------------------------------------------------------------------
-    Elf_Half get_index() const { return index; }
-
-    //------------------------------------------------------------------------------
-    const char* get_data() const { return data; }
-
-    //------------------------------------------------------------------------------
-    Elf_Half add_section_index(Elf_Half sec_index, Elf_Xword addr_align) {
-        sections.push_back(sec_index);
-        if (addr_align > get_align()) {
-            set_align(addr_align);
-        }
-
-        return (Elf_Half)sections.size();
-    }
-
-    //------------------------------------------------------------------------------
-    Elf_Half get_sections_num() const { return (Elf_Half)sections.size(); }
-
-    //------------------------------------------------------------------------------
-    Elf_Half get_section_index_at(Elf_Half num) const {
-        if (num < sections.size()) {
-            return sections[num];
-        }
-
-        return -1;
-    }
-
-    //------------------------------------------------------------------------------
-   protected:
-    //------------------------------------------------------------------------------
-
-    //------------------------------------------------------------------------------
-    void set_offset(Elf64_Off value) {
-        ph.p_offset = value;
-        ph.p_offset = (*convertor)(ph.p_offset);
-        is_offset_set = true;
-    }
-
-    //------------------------------------------------------------------------------
-    bool is_offset_initialized() const { return is_offset_set; }
-
-    //------------------------------------------------------------------------------
-    const std::vector<Elf_Half>& get_sections() const { return sections; }
-
-    //------------------------------------------------------------------------------
-    void set_index(Elf_Half value) { index = value; }
-
-    //------------------------------------------------------------------------------
-    void load(std::istream& stream, std::streampos header_offset) {
-        stream.seekg(header_offset);
-        stream.read(reinterpret_cast<char*>(&ph), sizeof(ph));
-        is_offset_set = true;
-
-        if (PT_NULL != get_type() && 0 != get_file_size()) {
-            stream.seekg((*convertor)(ph.p_offset));
-            Elf_Xword size = get_file_size();
-            try {
-                data = new char[size];
-            } catch (const std::bad_alloc&) {
-                data = 0;
-            }
-            if (0 != data) {
-                stream.read(data, size);
-            }
-        }
-    }
-
-    //------------------------------------------------------------------------------
-    void save(std::ostream& f, std::streampos header_offset, std::streampos data_offset) {
-        ph.p_offset = data_offset;
-        ph.p_offset = (*convertor)(ph.p_offset);
-        f.seekp(header_offset);
-        f.write(reinterpret_cast<const char*>(&ph), sizeof(ph));
-    }
-
-    //------------------------------------------------------------------------------
-   private:
-    T ph;
-    Elf_Half index;
-    char* data;
-    std::vector<Elf_Half> sections;
-    endianess_convertor* convertor;
-    bool is_offset_set;
-};
-
-}  // namespace ELFIO
-
-#endif  // ELFIO_SEGMENT_HPP
diff --git a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_strings.hpp b/third_party/rocm/include/hip/hcc_detail/elfio/elfio_strings.hpp
deleted file mode 100644
index 07adc3a..0000000
--- a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_strings.hpp
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
-Copyright (C) 2001-2015 by Serge Lamikhov-Center
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef ELFIO_STRINGS_HPP
-#define ELFIO_STRINGS_HPP
-
-#include <cstdlib>
-#include <cstring>
-#include <string>
-
-namespace ELFIO {
-
-//------------------------------------------------------------------------------
-class string_section_accessor {
-   public:
-    //------------------------------------------------------------------------------
-    string_section_accessor(section* section_) : string_section(section_) {}
-
-
-    //------------------------------------------------------------------------------
-    const char* get_string(Elf_Word index) const {
-        if (string_section) {
-            if (index < string_section->get_size()) {
-                const char* data = string_section->get_data();
-                if (0 != data) {
-                    return data + index;
-                }
-            }
-        }
-
-        return 0;
-    }
-
-
-    //------------------------------------------------------------------------------
-    Elf_Word add_string(const char* str) {
-        Elf_Word current_position = 0;
-
-        if (string_section) {
-            // Strings are addeded to the end of the current section data
-            current_position = (Elf_Word)string_section->get_size();
-
-            if (current_position == 0) {
-                char empty_string = '\0';
-                string_section->append_data(&empty_string, 1);
-                current_position++;
-            }
-            string_section->append_data(str, (Elf_Word)std::strlen(str) + 1);
-        }
-
-        return current_position;
-    }
-
-
-    //------------------------------------------------------------------------------
-    Elf_Word add_string(const std::string& str) { return add_string(str.c_str()); }
-
-    //------------------------------------------------------------------------------
-   private:
-    section* string_section;
-};
-
-}  // namespace ELFIO
-
-#endif  // ELFIO_STRINGS_HPP
diff --git a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_symbols.hpp b/third_party/rocm/include/hip/hcc_detail/elfio/elfio_symbols.hpp
deleted file mode 100644
index 8184bcd..0000000
--- a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_symbols.hpp
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
-Copyright (C) 2001-2015 by Serge Lamikhov-Center
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef ELFIO_SYMBOLS_HPP
-#define ELFIO_SYMBOLS_HPP
-
-namespace ELFIO {
-
-//------------------------------------------------------------------------------
-class symbol_section_accessor {
-   public:
-    //------------------------------------------------------------------------------
-    symbol_section_accessor(const elfio& elf_file_, section* symbol_section_)
-        : elf_file(elf_file_), symbol_section(symbol_section_) {
-        find_hash_section();
-    }
-
-    //------------------------------------------------------------------------------
-    Elf_Xword get_symbols_num() const {
-        Elf_Xword nRet = 0;
-        if (0 != symbol_section->get_entry_size()) {
-            nRet = symbol_section->get_size() / symbol_section->get_entry_size();
-        }
-
-        return nRet;
-    }
-
-    //------------------------------------------------------------------------------
-    bool get_symbol(Elf_Xword index, std::string& name, Elf64_Addr& value, Elf_Xword& size,
-                    unsigned char& bind, unsigned char& type, Elf_Half& section_index,
-                    unsigned char& other) const {
-        bool ret = false;
-
-        if (elf_file.get_class() == ELFCLASS32) {
-            ret = generic_get_symbol<Elf32_Sym>(index, name, value, size, bind, type, section_index,
-                                                other);
-        } else {
-            ret = generic_get_symbol<Elf64_Sym>(index, name, value, size, bind, type, section_index,
-                                                other);
-        }
-
-        return ret;
-    }
-
-    //------------------------------------------------------------------------------
-    bool get_symbol(const std::string& name, Elf64_Addr& value, Elf_Xword& size,
-                    unsigned char& bind, unsigned char& type, Elf_Half& section_index,
-                    unsigned char& other) const {
-        bool ret = false;
-
-        if (0 != get_hash_table_index()) {
-            Elf_Word nbucket = *(Elf_Word*)hash_section->get_data();
-            Elf_Word nchain = *(Elf_Word*)(hash_section->get_data() + sizeof(Elf_Word));
-            Elf_Word val = elf_hash((const unsigned char*)name.c_str());
-
-            Elf_Word y =
-                *(Elf_Word*)(hash_section->get_data() + (2 + val % nbucket) * sizeof(Elf_Word));
-            std::string str;
-            get_symbol(y, str, value, size, bind, type, section_index, other);
-            while (str != name && STN_UNDEF != y && y < nchain) {
-                y = *(Elf_Word*)(hash_section->get_data() + (2 + nbucket + y) * sizeof(Elf_Word));
-                get_symbol(y, str, value, size, bind, type, section_index, other);
-            }
-            if (str == name) {
-                ret = true;
-            }
-        }
-
-        return ret;
-    }
-
-    //------------------------------------------------------------------------------
-    Elf_Word add_symbol(Elf_Word name, Elf64_Addr value, Elf_Xword size, unsigned char info,
-                        unsigned char other, Elf_Half shndx) {
-        Elf_Word nRet;
-
-        if (symbol_section->get_size() == 0) {
-            if (elf_file.get_class() == ELFCLASS32) {
-                nRet = generic_add_symbol<Elf32_Sym>(0, 0, 0, 0, 0, 0);
-            } else {
-                nRet = generic_add_symbol<Elf64_Sym>(0, 0, 0, 0, 0, 0);
-            }
-        }
-
-        if (elf_file.get_class() == ELFCLASS32) {
-            nRet = generic_add_symbol<Elf32_Sym>(name, value, size, info, other, shndx);
-        } else {
-            nRet = generic_add_symbol<Elf64_Sym>(name, value, size, info, other, shndx);
-        }
-
-        return nRet;
-    }
-
-    //------------------------------------------------------------------------------
-    Elf_Word add_symbol(Elf_Word name, Elf64_Addr value, Elf_Xword size, unsigned char bind,
-                        unsigned char type, unsigned char other, Elf_Half shndx) {
-        return add_symbol(name, value, size, ELF_ST_INFO(bind, type), other, shndx);
-    }
-
-    //------------------------------------------------------------------------------
-    Elf_Word add_symbol(string_section_accessor& pStrWriter, const char* str, Elf64_Addr value,
-                        Elf_Xword size, unsigned char info, unsigned char other, Elf_Half shndx) {
-        Elf_Word index = pStrWriter.add_string(str);
-        return add_symbol(index, value, size, info, other, shndx);
-    }
-
-    //------------------------------------------------------------------------------
-    Elf_Word add_symbol(string_section_accessor& pStrWriter, const char* str, Elf64_Addr value,
-                        Elf_Xword size, unsigned char bind, unsigned char type, unsigned char other,
-                        Elf_Half shndx) {
-        return add_symbol(pStrWriter, str, value, size, ELF_ST_INFO(bind, type), other, shndx);
-    }
-
-    //------------------------------------------------------------------------------
-   private:
-    //------------------------------------------------------------------------------
-    void find_hash_section() {
-        hash_section = 0;
-        hash_section_index = 0;
-        Elf_Half nSecNo = elf_file.sections.size();
-        for (Elf_Half i = 0; i < nSecNo && 0 == hash_section_index; ++i) {
-            const section* sec = elf_file.sections[i];
-            if (sec->get_link() == symbol_section->get_index()) {
-                hash_section = sec;
-                hash_section_index = i;
-            }
-        }
-    }
-
-    //------------------------------------------------------------------------------
-    Elf_Half get_string_table_index() const { return (Elf_Half)symbol_section->get_link(); }
-
-    //------------------------------------------------------------------------------
-    Elf_Half get_hash_table_index() const { return hash_section_index; }
-
-    //------------------------------------------------------------------------------
-    template <class T>
-    bool generic_get_symbol(Elf_Xword index, std::string& name, Elf64_Addr& value, Elf_Xword& size,
-                            unsigned char& bind, unsigned char& type, Elf_Half& section_index,
-                            unsigned char& other) const {
-        bool ret = false;
-
-        if (index < get_symbols_num()) {
-            const T* pSym = reinterpret_cast<const T*>(symbol_section->get_data() +
-                                                       index * symbol_section->get_entry_size());
-
-            const endianess_convertor& convertor = elf_file.get_convertor();
-
-            section* string_section = elf_file.sections[get_string_table_index()];
-            string_section_accessor str_reader(string_section);
-            const char* pStr = str_reader.get_string(convertor(pSym->st_name));
-            if (0 != pStr) {
-                name = pStr;
-            }
-            value = convertor(pSym->st_value);
-            size = convertor(pSym->st_size);
-            bind = ELF_ST_BIND(pSym->st_info);
-            type = ELF_ST_TYPE(pSym->st_info);
-            section_index = convertor(pSym->st_shndx);
-            other = pSym->st_other;
-
-            ret = true;
-        }
-
-        return ret;
-    }
-
-    //------------------------------------------------------------------------------
-    template <class T>
-    Elf_Word generic_add_symbol(Elf_Word name, Elf64_Addr value, Elf_Xword size, unsigned char info,
-                                unsigned char other, Elf_Half shndx) {
-        const endianess_convertor& convertor = elf_file.get_convertor();
-
-        T entry;
-        entry.st_name = convertor(name);
-        entry.st_value = value;
-        entry.st_value = convertor(entry.st_value);
-        entry.st_size = size;
-        entry.st_size = convertor(entry.st_size);
-        entry.st_info = convertor(info);
-        entry.st_other = convertor(other);
-        entry.st_shndx = convertor(shndx);
-
-        symbol_section->append_data(reinterpret_cast<char*>(&entry), sizeof(entry));
-
-        Elf_Word nRet = symbol_section->get_size() / sizeof(entry) - 1;
-
-        return nRet;
-    }
-
-    //------------------------------------------------------------------------------
-   private:
-    const elfio& elf_file;
-    section* symbol_section;
-    Elf_Half hash_section_index;
-    const section* hash_section;
-};
-
-}  // namespace ELFIO
-
-#endif  // ELFIO_SYMBOLS_HPP
diff --git a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_utils.hpp b/third_party/rocm/include/hip/hcc_detail/elfio/elfio_utils.hpp
deleted file mode 100644
index b1bb00e..0000000
--- a/third_party/rocm/include/hip/hcc_detail/elfio/elfio_utils.hpp
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
-Copyright (C) 2001-2015 by Serge Lamikhov-Center
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef ELFIO_UTILS_HPP
-#define ELFIO_UTILS_HPP
-
-#define ELFIO_GET_ACCESS(TYPE, NAME, FIELD)                                                        \
-    TYPE get_##NAME() const { return (*convertor)(FIELD); }
-#define ELFIO_SET_ACCESS(TYPE, NAME, FIELD)                                                        \
-    void set_##NAME(TYPE value) {                                                                  \
-        FIELD = value;                                                                             \
-        FIELD = (*convertor)(FIELD);                                                               \
-    }
-#define ELFIO_GET_SET_ACCESS(TYPE, NAME, FIELD)                                                    \
-    TYPE get_##NAME() const { return (*convertor)(FIELD); }                                        \
-    void set_##NAME(TYPE value) {                                                                  \
-        FIELD = value;                                                                             \
-        FIELD = (*convertor)(FIELD);                                                               \
-    }
-
-#define ELFIO_GET_ACCESS_DECL(TYPE, NAME) virtual TYPE get_##NAME() const = 0
-
-#define ELFIO_SET_ACCESS_DECL(TYPE, NAME) virtual void set_##NAME(TYPE value) = 0
-
-#define ELFIO_GET_SET_ACCESS_DECL(TYPE, NAME)                                                      \
-    virtual TYPE get_##NAME() const = 0;                                                           \
-    virtual void set_##NAME(TYPE value) = 0
-
-namespace ELFIO {
-
-//------------------------------------------------------------------------------
-class endianess_convertor {
-   public:
-    //------------------------------------------------------------------------------
-    endianess_convertor() { need_conversion = false; }
-
-    //------------------------------------------------------------------------------
-    void setup(unsigned char elf_file_encoding) {
-        need_conversion = (elf_file_encoding != get_host_encoding());
-    }
-
-    //------------------------------------------------------------------------------
-    uint64_t operator()(uint64_t value) const {
-        if (!need_conversion) {
-            return value;
-        }
-        value = ((value & 0x00000000000000FFull) << 56) | ((value & 0x000000000000FF00ull) << 40) |
-                ((value & 0x0000000000FF0000ull) << 24) | ((value & 0x00000000FF000000ull) << 8) |
-                ((value & 0x000000FF00000000ull) >> 8) | ((value & 0x0000FF0000000000ull) >> 24) |
-                ((value & 0x00FF000000000000ull) >> 40) | ((value & 0xFF00000000000000ull) >> 56);
-
-        return value;
-    }
-
-    //------------------------------------------------------------------------------
-    int64_t operator()(int64_t value) const {
-        if (!need_conversion) {
-            return value;
-        }
-        return (int64_t)(*this)((uint64_t)value);
-    }
-
-    //------------------------------------------------------------------------------
-    uint32_t operator()(uint32_t value) const {
-        if (!need_conversion) {
-            return value;
-        }
-        value = ((value & 0x000000FF) << 24) | ((value & 0x0000FF00) << 8) |
-                ((value & 0x00FF0000) >> 8) | ((value & 0xFF000000) >> 24);
-
-        return value;
-    }
-
-    //------------------------------------------------------------------------------
-    int32_t operator()(int32_t value) const {
-        if (!need_conversion) {
-            return value;
-        }
-        return (int32_t)(*this)((uint32_t)value);
-    }
-
-    //------------------------------------------------------------------------------
-    uint16_t operator()(uint16_t value) const {
-        if (!need_conversion) {
-            return value;
-        }
-        value = ((value & 0x00FF) << 8) | ((value & 0xFF00) >> 8);
-
-        return value;
-    }
-
-    //------------------------------------------------------------------------------
-    int16_t operator()(int16_t value) const {
-        if (!need_conversion) {
-            return value;
-        }
-        return (int16_t)(*this)((uint16_t)value);
-    }
-
-    //------------------------------------------------------------------------------
-    int8_t operator()(int8_t value) const { return value; }
-
-    //------------------------------------------------------------------------------
-    uint8_t operator()(uint8_t value) const { return value; }
-
-    //------------------------------------------------------------------------------
-   private:
-    //------------------------------------------------------------------------------
-    unsigned char get_host_encoding() const {
-        static const int tmp = 1;
-        if (1 == *(char*)&tmp) {
-            return ELFDATA2LSB;
-        } else {
-            return ELFDATA2MSB;
-        }
-    }
-
-    //------------------------------------------------------------------------------
-   private:
-    bool need_conversion;
-};
-
-
-//------------------------------------------------------------------------------
-inline uint32_t elf_hash(const unsigned char* name) {
-    uint32_t h = 0, g;
-    while (*name) {
-        h = (h << 4) + *name++;
-        g = h & 0xf0000000;
-        if (g != 0) h ^= g >> 24;
-        h &= ~g;
-    }
-    return h;
-}
-
-}  // namespace ELFIO
-
-#endif  // ELFIO_UTILS_HPP
diff --git a/third_party/rocm/include/hip/hcc_detail/functional_grid_launch.hpp b/third_party/rocm/include/hip/hcc_detail/functional_grid_launch.hpp
deleted file mode 100644
index efe6a60..0000000
--- a/third_party/rocm/include/hip/hcc_detail/functional_grid_launch.hpp
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#pragma once
-
-#include "concepts.hpp"
-#include "helpers.hpp"
-#include "program_state.hpp"
-#include "hip_runtime_api.h"
-
-#include <cstdint>
-#include <cstring>
-#include <stdexcept>
-#include <tuple>
-#include <type_traits>
-#include <utility>
-
-hipError_t ihipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices,
-                                               unsigned int flags, hip_impl::program_state& ps);
-
-hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim,
-                                    dim3 blockDim, void** args,
-                                    size_t sharedMem, hipStream_t stream,
-                                    hip_impl::program_state& ps);
-
-hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
-                                                 int  numDevices,
-                                                 unsigned int flags,
-                                                 hip_impl::program_state& ps);
-
-#pragma GCC visibility push(hidden)
-
-namespace hip_impl {
-template <typename T, typename std::enable_if<std::is_integral<T>{}>::type* = nullptr>
-inline T round_up_to_next_multiple_nonnegative(T x, T y) {
-    T tmp = x + y - 1;
-    return tmp - tmp % y;
-}
-
-template <
-    std::size_t n,
-    typename... Ts,
-    typename std::enable_if<n == sizeof...(Ts)>::type* = nullptr>
-inline hip_impl::kernarg make_kernarg(
-    const std::tuple<Ts...>&,
-    const kernargs_size_align&,
-    hip_impl::kernarg kernarg) {
-    return kernarg;
-}
-
-template <
-    std::size_t n,
-    typename... Ts,
-    typename std::enable_if<n != sizeof...(Ts)>::type* = nullptr>
-inline hip_impl::kernarg make_kernarg(
-    const std::tuple<Ts...>& formals,
-    const kernargs_size_align& size_align,
-    hip_impl::kernarg kernarg) {
-    using T = typename std::tuple_element<n, std::tuple<Ts...>>::type;
-
-    static_assert(
-        !std::is_reference<T>{},
-        "A __global__ function cannot have a reference as one of its "
-            "arguments.");
-    #if defined(HIP_STRICT)
-        static_assert(
-            std::is_trivially_copyable<T>{},
-            "Only TriviallyCopyable types can be arguments to a __global__ "
-                "function");
-    #endif
-
-    kernarg.resize(round_up_to_next_multiple_nonnegative(
-        kernarg.size(), size_align.alignment(n)) + size_align.size(n));
-
-    std::memcpy(
-        kernarg.data() + kernarg.size() - size_align.size(n),
-        &std::get<n>(formals),
-        size_align.size(n));
-    return make_kernarg<n + 1>(formals, size_align, std::move(kernarg));
-}
-
-template <typename... Formals, typename... Actuals>
-inline hip_impl::kernarg make_kernarg(
-    void (*kernel)(Formals...), std::tuple<Actuals...> actuals) {
-    static_assert(sizeof...(Formals) == sizeof...(Actuals),
-        "The count of formal arguments must match the count of actuals.");
-
-    if (sizeof...(Formals) == 0) return {};
-
-    std::tuple<Formals...> to_formals{std::move(actuals)};
-    hip_impl::kernarg kernarg;
-    kernarg.reserve(sizeof(to_formals));
-
-    auto& ps = hip_impl::get_program_state();
-    return make_kernarg<0>(to_formals, 
-                           ps.get_kernargs_size_align(
-                               reinterpret_cast<std::uintptr_t>(kernel)),
-                           std::move(kernarg));
-}
-
-
-HIP_INTERNAL_EXPORTED_API hsa_agent_t target_agent(hipStream_t stream);
-
-inline
-__attribute__((visibility("hidden")))
-void hipLaunchKernelGGLImpl(
-    std::uintptr_t function_address,
-    const dim3& numBlocks,
-    const dim3& dimBlocks,
-    std::uint32_t sharedMemBytes,
-    hipStream_t stream,
-    void** kernarg) {
-
-    const auto& kd = hip_impl::get_program_state().kernel_descriptor(function_address, 
-                                                               target_agent(stream));
-
-    hipModuleLaunchKernel(kd, numBlocks.x, numBlocks.y, numBlocks.z,
-                          dimBlocks.x, dimBlocks.y, dimBlocks.z, sharedMemBytes,
-                          stream, nullptr, kernarg);
-}
-} // Namespace hip_impl.
-
-
-template <class T>
-inline
-hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
-    T kernel, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0) {
-
-    using namespace hip_impl;
-
-    hip_impl::hip_init();
-    auto f = get_program_state().kernel_descriptor(reinterpret_cast<std::uintptr_t>(kernel),
-                                                   target_agent(0));
-
-    return hipModuleOccupancyMaxPotentialBlockSize(gridSize, blockSize, f,
-                                      dynSharedMemPerBlk, blockSizeLimit);
-}
-
-template <class T>
-inline
-hipError_t hipOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize,
-    T kernel, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0, unsigned int  flags = 0 ) {
-
-    using namespace hip_impl;
-
-    hip_impl::hip_init();
-    if(flags != hipOccupancyDefault) return hipErrorNotSupported;
-    auto f = get_program_state().kernel_descriptor(reinterpret_cast<std::uintptr_t>(kernel),
-                                                   target_agent(0));
-
-    return hipModuleOccupancyMaxPotentialBlockSize(gridSize, blockSize, f,
-                                      dynSharedMemPerBlk, blockSizeLimit);
-}
-
-template <typename... Args, typename F = void (*)(Args...)>
-inline
-void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
-                        std::uint32_t sharedMemBytes, hipStream_t stream,
-                        Args... args) {
-    hip_impl::hip_init();
-    auto kernarg = hip_impl::make_kernarg(kernel, std::tuple<Args...>{std::move(args)...});
-    std::size_t kernarg_size = kernarg.size();
-
-    void* config[]{
-        HIP_LAUNCH_PARAM_BUFFER_POINTER,
-        kernarg.data(),
-        HIP_LAUNCH_PARAM_BUFFER_SIZE,
-        &kernarg_size,
-        HIP_LAUNCH_PARAM_END};
-
-    hip_impl::hipLaunchKernelGGLImpl(reinterpret_cast<std::uintptr_t>(kernel),
-                                     numBlocks, dimBlocks, sharedMemBytes,
-                                     stream, &config[0]);
-}
-
-template <typename F>
-inline
-__attribute__((visibility("hidden")))
-hipError_t hipLaunchCooperativeKernel(F f, dim3 gridDim, dim3 blockDim,
-                                      void** args, size_t sharedMem,
-                                      hipStream_t stream) {
-    hip_impl::hip_init();
-    auto& ps = hip_impl::get_program_state();
-    return hipLaunchCooperativeKernel(reinterpret_cast<void*>(f), gridDim,
-                                      blockDim, args, sharedMem, stream, ps);
-}
-
-inline
-__attribute__((visibility("hidden")))
-hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
-                                                 int  numDevices,
-                                                 unsigned int  flags) {
-
-    hip_impl::hip_init();
-    auto& ps = hip_impl::get_program_state();
-    return hipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, ps);
-}
-
-#pragma GCC visibility pop
diff --git a/third_party/rocm/include/hip/hcc_detail/grid_launch.h b/third_party/rocm/include/hip/hcc_detail/grid_launch.h
deleted file mode 100644
index 22841a5..0000000
--- a/third_party/rocm/include/hip/hcc_detail/grid_launch.h
+++ /dev/null
@@ -1,67 +0,0 @@
-#pragma once
-
-#include <stdint.h>
-
-#include <hc_defines.h>
-
-#define GRID_LAUNCH_VERSION 20
-
-// Extern definitions
-namespace hc{
-class completion_future;
-class accelerator_view;
-}
-
-
-// 3 dim structure for groups and grids.
-typedef struct gl_dim3
-{
-  int x,y,z;
-  gl_dim3(uint32_t _x=1, uint32_t _y=1, uint32_t _z=1) : x(_x), y(_y), z(_z) {};
-} gl_dim3;
-
-typedef enum gl_barrier_bit {
-    barrier_bit_queue_default,
-    barrier_bit_none,
-    barrier_bit_wait,
-} gl_barrier_bit;
-
-
-// grid_launch_parm contains information used to launch the kernel.
-typedef struct grid_launch_parm
-{
-  //! Grid dimensions
-  gl_dim3      grid_dim;
-
-  //! Group dimensions
-  gl_dim3      group_dim;
-
-  //! Amount of dynamic group memory to use with the kernel launch.
-  //! This memory is in addition to the amount used statically in the kernel.
-  unsigned int  dynamic_group_mem_bytes;
-
-  //! Control setting of barrier bit on per-packet basis:
-  //! See gl_barrier_bit description.  
-  //! Placeholder, is not used to control packet dispatch yet
-  enum gl_barrier_bit barrier_bit;
-
-  //! Value of packet fences to apply to launch.
-  //! The correspond to the value of bits 9:14 in the AQL packet,
-  //! see HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE and hsa_fence_scope_t.
-  unsigned int  launch_fence;
-
-  //! Pointer to the accelerator_view where the kernel should execute.
-  //! If NULL, the default view on the default accelerator is used.
-  hc::accelerator_view  *av;
-
-  //! Pointer to the completion_future used to track the status of the command.
-  //! If NULL, the command does not write status.  In this case, 
-  //! synchronization can be enforced with queue-level waits or 
-  //! waiting on younger commands.
-  hc::completion_future *cf;
-
-  grid_launch_parm() = default;
-} grid_launch_parm;
-
-
-extern void init_grid_launch(grid_launch_parm *gl);
diff --git a/third_party/rocm/include/hip/hcc_detail/grid_launch.hpp b/third_party/rocm/include/hip/hcc_detail/grid_launch.hpp
deleted file mode 100644
index 04ce7e0..0000000
--- a/third_party/rocm/include/hip/hcc_detail/grid_launch.hpp
+++ /dev/null
@@ -1,50 +0,0 @@
-#pragma once
-
-#include "grid_launch.h"
-#include "hc.hpp"
-
-class grid_launch_parm_cxx : public grid_launch_parm
-{
-public:
-  grid_launch_parm_cxx() = default;
-
-  // customized serialization: don't need av and cf in kernel
-  __attribute__((annotate("serialize")))
-  void __cxxamp_serialize(Kalmar::Serialize& s) const {
-    s.Append(sizeof(int), &grid_dim.x);
-    s.Append(sizeof(int), &grid_dim.y);
-    s.Append(sizeof(int), &grid_dim.z);
-    s.Append(sizeof(int), &group_dim.x);
-    s.Append(sizeof(int), &group_dim.y);
-    s.Append(sizeof(int), &group_dim.z);
-  }
-
-  __attribute__((annotate("user_deserialize")))
-  grid_launch_parm_cxx(int grid_dim_x,  int grid_dim_y,  int grid_dim_z,
-                   int group_dim_x, int group_dim_y, int group_dim_z) {
-    grid_dim.x  = grid_dim_x;
-    grid_dim.y  = grid_dim_y;
-    grid_dim.z  = grid_dim_z;
-    group_dim.x = group_dim_x;
-    group_dim.y = group_dim_y;
-    group_dim.z = group_dim_z;
-  }
-};
-
-
-extern inline void grid_launch_init(grid_launch_parm *lp) {
-  lp->grid_dim.x = lp->grid_dim.y = lp->grid_dim.z = 1;
-
-  lp->group_dim.x = lp->group_dim.y = lp->group_dim.z = 1;
-
-  lp->dynamic_group_mem_bytes = 0;
-
-  lp->barrier_bit = barrier_bit_queue_default;
-  lp->launch_fence = -1;
-
-  // TODO - set to NULL?
-  static hc::accelerator_view av = hc::accelerator().get_default_view();
-  lp->av = &av;
-  lp->cf = NULL;
-}
-
diff --git a/third_party/rocm/include/hip/hcc_detail/grid_launch_GGL.hpp b/third_party/rocm/include/hip/hcc_detail/grid_launch_GGL.hpp
deleted file mode 100644
index 1c05279..0000000
--- a/third_party/rocm/include/hip/hcc_detail/grid_launch_GGL.hpp
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-#pragma once
-
-#if GENERIC_GRID_LAUNCH == 1
-#if __hcc_workweek__ >= 17481
-#include "functional_grid_launch.hpp"
-#else
-#include "macro_based_grid_launch.hpp"
-#endif
-#endif  // GENERIC_GRID_LAUNCH
\ No newline at end of file
diff --git a/third_party/rocm/include/hip/hcc_detail/helpers.hpp b/third_party/rocm/include/hip/hcc_detail/helpers.hpp
deleted file mode 100644
index b94b126..0000000
--- a/third_party/rocm/include/hip/hcc_detail/helpers.hpp
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#pragma once
-#include "concepts.hpp"
-
-#include <type_traits>  // For std::conditional, std::decay, std::enable_if,
-                        // std::false_type, std result_of and std::true_type.
-#include <utility>      // For std::declval.
-
-#ifdef __has_include                      // Check if __has_include is present
-#  if __has_include(<version>)            // Check for version header
-#    include <version>
-#    if defined(__cpp_lib_is_invocable) && !defined(HIP_HAS_INVOCABLE)
-#       define HIP_HAS_INVOCABLE __cpp_lib_is_invocable
-#    endif
-#    if defined(__cpp_lib_result_of_sfinae) && !defined(HIP_HAS_RESULT_OF_SFINAE)
-#       define HIP_HAS_RESULT_OF_SFINAE __cpp_lib_result_of_sfinae
-#    endif
-#  endif
-#endif
-
-#ifndef HIP_HAS_INVOCABLE
-#define HIP_HAS_INVOCABLE 0
-#endif
-
-#ifndef HIP_HAS_RESULT_OF_SFINAE
-#define HIP_HAS_RESULT_OF_SFINAE 0
-#endif
-
-namespace std {  // TODO: these should be removed as soon as possible.
-#if (__cplusplus < 201406L)
-#if (__cplusplus < 201402L)
-template <bool cond, typename T = void>
-using enable_if_t = typename enable_if<cond, T>::type;
-template <bool cond, typename T, typename U>
-using conditional_t = typename conditional<cond, T, U>::type;
-template <typename T>
-using decay_t = typename decay<T>::type;
-template <FunctionalProcedure F, typename... Ts>
-using result_of_t = typename result_of<F(Ts...)>::type;
-template <typename T>
-using remove_reference_t = typename remove_reference<T>::type;
-#endif
-#endif
-}  // namespace std
-
-namespace hip_impl {
-template <typename...>
-using void_t_ = void;
-
-#if HIP_HAS_INVOCABLE
-template <typename, typename = void>
-struct is_callable_impl;
-
-template <FunctionalProcedure F, typename... Ts>
-struct is_callable_impl<F(Ts...)> : std::is_invocable<F, Ts...> {};
-#elif HIP_HAS_RESULT_OF_SFINAE
-template <typename, typename = void>
-struct is_callable_impl : std::false_type {};
-
-template <FunctionalProcedure F, typename... Ts>
-struct is_callable_impl<F(Ts...), void_t_<typename std::result_of<F(Ts...)>::type > > : std::true_type {};
-#else
-template <class Base, class T, class Derived>
-auto simple_invoke(T Base::*pmd, Derived&& ref)
--> decltype(static_cast<Derived&&>(ref).*pmd);
- 
-template <class PMD, class Pointer>
-auto simple_invoke(PMD&& pmd, Pointer&& ptr)
--> decltype((*static_cast<Pointer&&>(ptr)).*static_cast<PMD&&>(pmd));
-
-template <class Base, class T, class Derived>
-auto simple_invoke(T Base::*pmd, const std::reference_wrapper<Derived>& ref)
--> decltype(ref.get().*pmd);
- 
-template <class Base, class T, class Derived, class... Args>
-auto simple_invoke(T Base::*pmf, Derived&& ref, Args&&... args)
--> decltype((static_cast<Derived&&>(ref).*pmf)(static_cast<Args&&>(args)...));
- 
-template <class PMF, class Pointer, class... Args>
-auto simple_invoke(PMF&& pmf, Pointer&& ptr, Args&&... args)
--> decltype(((*static_cast<Pointer&&>(ptr)).*static_cast<PMF&&>(pmf))(static_cast<Args&&>(args)...));
-
-template <class Base, class T, class Derived, class... Args>
-auto simple_invoke(T Base::*pmf, const std::reference_wrapper<Derived>& ref, Args&&... args)
--> decltype((ref.get().*pmf)(static_cast<Args&&>(args)...));
-
-template<class F, class... Ts>
-auto simple_invoke(F&& f, Ts&&... xs) 
--> decltype(f(static_cast<Ts&&>(xs)...));
-
-template <typename, typename = void>
-struct is_callable_impl : std::false_type {};
-
-template <FunctionalProcedure F, typename... Ts>
-struct is_callable_impl<F(Ts...), void_t_<decltype(simple_invoke(std::declval<F>(), std::declval<Ts>()...))> >
-    : std::true_type {};
-
-#endif
-
-template <typename Call>
-struct is_callable : is_callable_impl<Call> {};
-
-#define count_macro_args_impl_hip_(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13,     \
-                                   _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25,     \
-                                   _26, _27, _28, _29, _30, _31, _n, ...)                          \
-    _n
-#define count_macro_args_hip_(...)                                                                 \
-    count_macro_args_impl_hip_(, ##__VA_ARGS__, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20,    \
-                               19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,  \
-                               0)
-
-#define overloaded_macro_expand_hip_(macro, arg_cnt) macro##arg_cnt
-#define overload_macro_impl_hip_(macro, arg_cnt) overloaded_macro_expand_hip_(macro, arg_cnt)
-#define overload_macro_hip_(macro, ...)                                                            \
-    overload_macro_impl_hip_(macro, count_macro_args_hip_(__VA_ARGS__))(__VA_ARGS__)
-}  // namespace hip_impl
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_atomic.h b/third_party/rocm/include/hip/hcc_detail/hip_atomic.h
deleted file mode 100644
index a1370ce..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_atomic.h
+++ /dev/null
@@ -1,286 +0,0 @@
-#pragma once
-
-#include "device_functions.h"
-
-__device__
-inline
-int atomicCAS(int* address, int compare, int val)
-{
-    __atomic_compare_exchange_n(
-        address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
-
-    return compare;
-}
-__device__
-inline
-unsigned int atomicCAS(
-    unsigned int* address, unsigned int compare, unsigned int val)
-{
-    __atomic_compare_exchange_n(
-        address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
-
-    return compare;
-}
-__device__
-inline
-unsigned long long atomicCAS(
-    unsigned long long* address,
-    unsigned long long compare,
-    unsigned long long val)
-{
-    __atomic_compare_exchange_n(
-        address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
-
-    return compare;
-}
-
-__device__
-inline
-int atomicAdd(int* address, int val)
-{
-    return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-unsigned int atomicAdd(unsigned int* address, unsigned int val)
-{
-    return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-unsigned long long atomicAdd(
-    unsigned long long* address, unsigned long long val)
-{
-    return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-float atomicAdd(float* address, float val)
-{
-    return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
-}
-
-DEPRECATED("use atomicAdd instead")
-__device__
-inline
-void atomicAddNoRet(float* address, float val)
-{
-    __ockl_atomic_add_noret_f32(address, val);
-}
-
-__device__
-inline
-double atomicAdd(double* address, double val)
-{
-    unsigned long long* uaddr{reinterpret_cast<unsigned long long*>(address)};
-    unsigned long long r{__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
-
-    unsigned long long old;
-    do {
-        old = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
-
-        if (r != old) { r = old; continue; }
-
-        r = atomicCAS(
-            uaddr, r, __double_as_longlong(val + __longlong_as_double(r)));
-
-        if (r == old) break;
-    } while (true);
-
-    return __longlong_as_double(r);
-}
-
-__device__
-inline
-int atomicSub(int* address, int val)
-{
-    return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-unsigned int atomicSub(unsigned int* address, unsigned int val)
-{
-    return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
-}
-
-__device__
-inline
-int atomicExch(int* address, int val)
-{
-    return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-unsigned int atomicExch(unsigned int* address, unsigned int val)
-{
-    return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-unsigned long long atomicExch(unsigned long long* address, unsigned long long val)
-{
-    return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-float atomicExch(float* address, float val)
-{
-    return __uint_as_float(__atomic_exchange_n(
-        reinterpret_cast<unsigned int*>(address),
-        __float_as_uint(val),
-        __ATOMIC_RELAXED));
-}
-
-__device__
-inline
-int atomicMin(int* address, int val)
-{
-    return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-unsigned int atomicMin(unsigned int* address, unsigned int val)
-{
-    return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-unsigned long long atomicMin(
-    unsigned long long* address, unsigned long long val)
-{
-    unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
-    while (val < tmp) {
-        const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
-
-        if (tmp1 != tmp) { tmp = tmp1; continue; }
-
-        tmp = atomicCAS(address, tmp, val);
-    }
-
-    return tmp;
-}
-
-__device__
-inline
-int atomicMax(int* address, int val)
-{
-    return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-unsigned int atomicMax(unsigned int* address, unsigned int val)
-{
-    return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-unsigned long long atomicMax(
-    unsigned long long* address, unsigned long long val)
-{
-    unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
-    while (tmp < val) {
-        const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
-
-        if (tmp1 != tmp) { tmp = tmp1; continue; }
-
-        tmp = atomicCAS(address, tmp, val);
-    }
-
-    return tmp;
-}
-
-__device__
-inline
-unsigned int atomicInc(unsigned int* address, unsigned int val)
-{
-    __device__
-    extern
-    unsigned int __builtin_amdgcn_atomic_inc(
-        unsigned int*,
-        unsigned int,
-        unsigned int,
-        unsigned int,
-        bool) __asm("llvm.amdgcn.atomic.inc.i32.p0i32");
-
-    return __builtin_amdgcn_atomic_inc(
-        address, val, __ATOMIC_RELAXED, 1 /* Device scope */, false);
-}
-
-__device__
-inline
-unsigned int atomicDec(unsigned int* address, unsigned int val)
-{
-    __device__
-    extern
-    unsigned int __builtin_amdgcn_atomic_dec(
-        unsigned int*,
-        unsigned int,
-        unsigned int,
-        unsigned int,
-        bool) __asm("llvm.amdgcn.atomic.dec.i32.p0i32");
-
-    return __builtin_amdgcn_atomic_dec(
-        address, val, __ATOMIC_RELAXED, 1 /* Device scope */, false);
-}
-
-__device__
-inline
-int atomicAnd(int* address, int val)
-{
-    return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-unsigned int atomicAnd(unsigned int* address, unsigned int val)
-{
-    return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-unsigned long long atomicAnd(
-    unsigned long long* address, unsigned long long val)
-{
-    return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
-}
-
-__device__
-inline
-int atomicOr(int* address, int val)
-{
-    return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-unsigned int atomicOr(unsigned int* address, unsigned int val)
-{
-    return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-unsigned long long atomicOr(
-    unsigned long long* address, unsigned long long val)
-{
-    return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
-}
-
-__device__
-inline
-int atomicXor(int* address, int val)
-{
-    return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-unsigned int atomicXor(unsigned int* address, unsigned int val)
-{
-    return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
-}
-__device__
-inline
-unsigned long long atomicXor(
-    unsigned long long* address, unsigned long long val)
-{
-    return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
-}
-
-// TODO: add scoped atomics i.e. atomic{*}_system && atomic{*}_block.
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_common.h b/third_party/rocm/include/hip/hcc_detail/hip_common.h
deleted file mode 100644
index 2e2abac..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_common.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
-Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the "Software"), to deal in
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-of the Software, and to permit persons to whom the Software is furnished to do
-so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COMMON_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COMMON_H
-
-#if defined(__HCC__)
-#define __HCC_OR_HIP_CLANG__ 1
-#define __HCC_ONLY__ 1
-#define __HIP_CLANG_ONLY__ 0
-#elif defined(__clang__) && defined(__HIP__)
-#define __HCC_OR_HIP_CLANG__ 1
-#define __HCC_ONLY__ 0
-#define __HIP_CLANG_ONLY__ 1
-#else
-#define __HCC_OR_HIP_CLANG__ 0
-#define __HCC_ONLY__ 0
-#define __HIP_CLANG_ONLY__ 0
-#endif
-
-#endif // HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COMMON_H
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_complex.h b/third_party/rocm/include/hip/hcc_detail/hip_complex.h
deleted file mode 100644
index 11648ce..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_complex.h
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COMPLEX_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COMPLEX_H
-
-#include "hip/hcc_detail/hip_vector_types.h"
-
-// TODO: Clang has a bug which allows device functions to call std functions
-// when std functions are introduced into default namespace by using statement.
-// math.h may be included after this bug is fixed.
-#if __cplusplus
-#include <cmath>
-#else
-#include "math.h"
-#endif
-
-#if __cplusplus
-#define COMPLEX_NEG_OP_OVERLOAD(type)                                                              \
-    __device__ __host__ static inline type operator-(const type& op) {                             \
-        type ret;                                                                                  \
-        ret.x = -op.x;                                                                             \
-        ret.y = -op.y;                                                                             \
-        return ret;                                                                                \
-    }
-
-#define COMPLEX_EQ_OP_OVERLOAD(type)                                                               \
-    __device__ __host__ static inline bool operator==(const type& lhs, const type& rhs) {          \
-        return lhs.x == rhs.x && lhs.y == rhs.y;                                                   \
-    }
-
-#define COMPLEX_NE_OP_OVERLOAD(type)                                                               \
-    __device__ __host__ static inline bool operator!=(const type& lhs, const type& rhs) {          \
-        return !(lhs == rhs);                                                                      \
-    }
-
-#define COMPLEX_ADD_OP_OVERLOAD(type)                                                              \
-    __device__ __host__ static inline type operator+(const type& lhs, const type& rhs) {           \
-        type ret;                                                                                  \
-        ret.x = lhs.x + rhs.x;                                                                     \
-        ret.y = lhs.y + rhs.y;                                                                     \
-        return ret;                                                                                \
-    }
-
-#define COMPLEX_SUB_OP_OVERLOAD(type)                                                              \
-    __device__ __host__ static inline type operator-(const type& lhs, const type& rhs) {           \
-        type ret;                                                                                  \
-        ret.x = lhs.x - rhs.x;                                                                     \
-        ret.y = lhs.y - rhs.y;                                                                     \
-        return ret;                                                                                \
-    }
-
-#define COMPLEX_MUL_OP_OVERLOAD(type)                                                              \
-    __device__ __host__ static inline type operator*(const type& lhs, const type& rhs) {           \
-        type ret;                                                                                  \
-        ret.x = lhs.x * rhs.x - lhs.y * rhs.y;                                                     \
-        ret.y = lhs.x * rhs.y + lhs.y * rhs.x;                                                     \
-        return ret;                                                                                \
-    }
-
-#define COMPLEX_DIV_OP_OVERLOAD(type)                                                              \
-    __device__ __host__ static inline type operator/(const type& lhs, const type& rhs) {           \
-        type ret;                                                                                  \
-        ret.x = (lhs.x * rhs.x + lhs.y * rhs.y);                                                   \
-        ret.y = (rhs.x * lhs.y - lhs.x * rhs.y);                                                   \
-        ret.x = ret.x / (rhs.x * rhs.x + rhs.y * rhs.y);                                           \
-        ret.y = ret.y / (rhs.x * rhs.x + rhs.y * rhs.y);                                           \
-        return ret;                                                                                \
-    }
-
-#define COMPLEX_ADD_PREOP_OVERLOAD(type)                                                           \
-    __device__ __host__ static inline type& operator+=(type& lhs, const type& rhs) {               \
-        lhs.x += rhs.x;                                                                            \
-        lhs.y += rhs.y;                                                                            \
-        return lhs;                                                                                \
-    }
-
-#define COMPLEX_SUB_PREOP_OVERLOAD(type)                                                           \
-    __device__ __host__ static inline type& operator-=(type& lhs, const type& rhs) {               \
-        lhs.x -= rhs.x;                                                                            \
-        lhs.y -= rhs.y;                                                                            \
-        return lhs;                                                                                \
-    }
-
-#define COMPLEX_MUL_PREOP_OVERLOAD(type)                                                           \
-    __device__ __host__ static inline type& operator*=(type& lhs, const type& rhs) {               \
-        lhs = lhs * rhs;                                                                           \
-        return lhs;                                                                                \
-    }
-
-#define COMPLEX_DIV_PREOP_OVERLOAD(type)                                                           \
-    __device__ __host__ static inline type& operator/=(type& lhs, const type& rhs) {               \
-        lhs = lhs / rhs;                                                                           \
-        return lhs;                                                                                \
-    }
-
-#define COMPLEX_SCALAR_PRODUCT(type, type1)                                                        \
-    __device__ __host__ static inline type operator*(const type& lhs, type1 rhs) {                 \
-        type ret;                                                                                  \
-        ret.x = lhs.x * rhs;                                                                       \
-        ret.y = lhs.y * rhs;                                                                       \
-        return ret;                                                                                \
-    }
-
-#endif
-
-typedef float2 hipFloatComplex;
-
-__device__ __host__ static inline float hipCrealf(hipFloatComplex z) { return z.x; }
-
-__device__ __host__ static inline float hipCimagf(hipFloatComplex z) { return z.y; }
-
-__device__ __host__ static inline hipFloatComplex make_hipFloatComplex(float a, float b) {
-    hipFloatComplex z;
-    z.x = a;
-    z.y = b;
-    return z;
-}
-
-__device__ __host__ static inline hipFloatComplex hipConjf(hipFloatComplex z) {
-    hipFloatComplex ret;
-    ret.x = z.x;
-    ret.y = -z.y;
-    return ret;
-}
-
-__device__ __host__ static inline float hipCsqabsf(hipFloatComplex z) {
-    return z.x * z.x + z.y * z.y;
-}
-
-__device__ __host__ static inline hipFloatComplex hipCaddf(hipFloatComplex p, hipFloatComplex q) {
-    return make_hipFloatComplex(p.x + q.x, p.y + q.y);
-}
-
-__device__ __host__ static inline hipFloatComplex hipCsubf(hipFloatComplex p, hipFloatComplex q) {
-    return make_hipFloatComplex(p.x - q.x, p.y - q.y);
-}
-
-__device__ __host__ static inline hipFloatComplex hipCmulf(hipFloatComplex p, hipFloatComplex q) {
-    return make_hipFloatComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y);
-}
-
-__device__ __host__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hipFloatComplex q) {
-    float sqabs = hipCsqabsf(q);
-    hipFloatComplex ret;
-    ret.x = (p.x * q.x + p.y * q.y) / sqabs;
-    ret.y = (p.y * q.x - p.x * q.y) / sqabs;
-    return ret;
-}
-
-__device__ __host__ static inline float hipCabsf(hipFloatComplex z) { return sqrtf(hipCsqabsf(z)); }
-
-
-typedef double2 hipDoubleComplex;
-
-__device__ __host__ static inline double hipCreal(hipDoubleComplex z) { return z.x; }
-
-__device__ __host__ static inline double hipCimag(hipDoubleComplex z) { return z.y; }
-
-__device__ __host__ static inline hipDoubleComplex make_hipDoubleComplex(double a, double b) {
-    hipDoubleComplex z;
-    z.x = a;
-    z.y = b;
-    return z;
-}
-
-__device__ __host__ static inline hipDoubleComplex hipConj(hipDoubleComplex z) {
-    hipDoubleComplex ret;
-    ret.x = z.x;
-    ret.y = -z.y;
-    return ret;
-}
-
-__device__ __host__ static inline double hipCsqabs(hipDoubleComplex z) {
-    return z.x * z.x + z.y * z.y;
-}
-
-__device__ __host__ static inline hipDoubleComplex hipCadd(hipDoubleComplex p, hipDoubleComplex q) {
-    return make_hipDoubleComplex(p.x + q.x, p.y + q.y);
-}
-
-__device__ __host__ static inline hipDoubleComplex hipCsub(hipDoubleComplex p, hipDoubleComplex q) {
-    return make_hipDoubleComplex(p.x - q.x, p.y - q.y);
-}
-
-__device__ __host__ static inline hipDoubleComplex hipCmul(hipDoubleComplex p, hipDoubleComplex q) {
-    return make_hipDoubleComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y);
-}
-
-__device__ __host__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, hipDoubleComplex q) {
-    double sqabs = hipCsqabs(q);
-    hipDoubleComplex ret;
-    ret.x = (p.x * q.x + p.y * q.y) / sqabs;
-    ret.y = (p.y * q.x - p.x * q.y) / sqabs;
-    return ret;
-}
-
-__device__ __host__ static inline double hipCabs(hipDoubleComplex z) { return sqrtf(hipCsqabs(z)); }
-
-
-#if __cplusplus
-
-COMPLEX_NEG_OP_OVERLOAD(hipFloatComplex)
-COMPLEX_EQ_OP_OVERLOAD(hipFloatComplex)
-COMPLEX_NE_OP_OVERLOAD(hipFloatComplex)
-COMPLEX_ADD_OP_OVERLOAD(hipFloatComplex)
-COMPLEX_SUB_OP_OVERLOAD(hipFloatComplex)
-COMPLEX_MUL_OP_OVERLOAD(hipFloatComplex)
-COMPLEX_DIV_OP_OVERLOAD(hipFloatComplex)
-COMPLEX_ADD_PREOP_OVERLOAD(hipFloatComplex)
-COMPLEX_SUB_PREOP_OVERLOAD(hipFloatComplex)
-COMPLEX_MUL_PREOP_OVERLOAD(hipFloatComplex)
-COMPLEX_DIV_PREOP_OVERLOAD(hipFloatComplex)
-COMPLEX_SCALAR_PRODUCT(hipFloatComplex, unsigned short)
-COMPLEX_SCALAR_PRODUCT(hipFloatComplex, signed short)
-COMPLEX_SCALAR_PRODUCT(hipFloatComplex, unsigned int)
-COMPLEX_SCALAR_PRODUCT(hipFloatComplex, signed int)
-COMPLEX_SCALAR_PRODUCT(hipFloatComplex, float)
-COMPLEX_SCALAR_PRODUCT(hipFloatComplex, unsigned long)
-COMPLEX_SCALAR_PRODUCT(hipFloatComplex, signed long)
-COMPLEX_SCALAR_PRODUCT(hipFloatComplex, double)
-COMPLEX_SCALAR_PRODUCT(hipFloatComplex, signed long long)
-COMPLEX_SCALAR_PRODUCT(hipFloatComplex, unsigned long long)
-
-COMPLEX_NEG_OP_OVERLOAD(hipDoubleComplex)
-COMPLEX_EQ_OP_OVERLOAD(hipDoubleComplex)
-COMPLEX_NE_OP_OVERLOAD(hipDoubleComplex)
-COMPLEX_ADD_OP_OVERLOAD(hipDoubleComplex)
-COMPLEX_SUB_OP_OVERLOAD(hipDoubleComplex)
-COMPLEX_MUL_OP_OVERLOAD(hipDoubleComplex)
-COMPLEX_DIV_OP_OVERLOAD(hipDoubleComplex)
-COMPLEX_ADD_PREOP_OVERLOAD(hipDoubleComplex)
-COMPLEX_SUB_PREOP_OVERLOAD(hipDoubleComplex)
-COMPLEX_MUL_PREOP_OVERLOAD(hipDoubleComplex)
-COMPLEX_DIV_PREOP_OVERLOAD(hipDoubleComplex)
-COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned short)
-COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, signed short)
-COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned int)
-COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, signed int)
-COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, float)
-COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned long)
-COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, signed long)
-COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, double)
-COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, signed long long)
-COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned long long)
-
-#endif
-
-
-typedef hipFloatComplex hipComplex;
-
-__device__ __host__ static inline hipComplex make_hipComplex(float x, float y) {
-    return make_hipFloatComplex(x, y);
-}
-
-__device__ __host__ static inline hipFloatComplex hipComplexDoubleToFloat(hipDoubleComplex z) {
-    return make_hipFloatComplex((float)z.x, (float)z.y);
-}
-
-__device__ __host__ static inline hipDoubleComplex hipComplexFloatToDouble(hipFloatComplex z) {
-    return make_hipDoubleComplex((double)z.x, (double)z.y);
-}
-
-__device__ __host__ static inline hipComplex hipCfmaf(hipComplex p, hipComplex q, hipComplex r) {
-    float real = (p.x * q.x) + r.x;
-    float imag = (q.x * p.y) + r.y;
-
-    real = -(p.y * q.y) + real;
-    imag = (p.x * q.y) + imag;
-
-    return make_hipComplex(real, imag);
-}
-
-__device__ __host__ static inline hipDoubleComplex hipCfma(hipDoubleComplex p, hipDoubleComplex q,
-                                                           hipDoubleComplex r) {
-    double real = (p.x * q.x) + r.x;
-    double imag = (q.x * p.y) + r.y;
-
-    real = -(p.y * q.y) + real;
-    imag = (p.x * q.y) + imag;
-
-    return make_hipDoubleComplex(real, imag);
-}
-
-#endif //HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COMPLEX_H
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_cooperative_groups.h b/third_party/rocm/include/hip/hcc_detail/hip_cooperative_groups.h
deleted file mode 100644
index 353bdc5..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_cooperative_groups.h
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/**
- *  @file  hcc_detail/hip_cooperative_groups.h
- *
- *  @brief Device side implementation of `Cooperative Group` feature.
- *
- *  Defines new types and device API wrappers related to `Cooperative Group`
- *  feature, which the programmer can directly use in his kernel(s) in order to
- *  make use of this feature.
- */
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COOPERATIVE_GROUPS_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COOPERATIVE_GROUPS_H
-
-#if __cplusplus
-#include <hip/hcc_detail/hip_cooperative_groups_helper.h>
-
-namespace cooperative_groups {
-
-/** \brief The base type of all cooperative group types
- *
- *  \details Holds the key properties of a constructed cooperative group type
- *           object, like the group type, its size, etc
- */
-class thread_group {
- protected:
-  uint32_t _type; // thread_group type
-  uint32_t _size; // total number of threads in the tread_group
-  uint64_t _mask; // Lanemask for coalesced and tiled partitioned group types,
-                  // LSB represents lane 0, and MSB represents lane 63
-
-  // Construct a thread group, and set thread group type and other essential
-  // thread group properties. This generic thread group is directly constructed
-  // only when the group is supposed to contain only the calling the thread
-  // (throurh the API - `this_thread()`), and in all other cases, this thread
-  // group object is a sub-object of some other derived thread group object
-  __CG_QUALIFIER__ thread_group(internal::group_type type, uint32_t size,
-                                uint64_t mask = (uint64_t)0) {
-    _type = type;
-    _size = size;
-    _mask = mask;
-  }
-
- public:
-  // Total number of threads in the thread group, and this serves the purpose
-  // for all derived cooperative group types since their `size` is directly
-  // saved during the construction
-  __CG_QUALIFIER__ uint32_t size() const {
-    return _size;
-  }
-  // Rank of the calling thread within [0, size())
-  __CG_QUALIFIER__ uint32_t thread_rank() const;
-  // Is this cooperative group type valid?
-  __CG_QUALIFIER__ bool is_valid() const;
-  // synchronize the threads in the thread group
-  __CG_QUALIFIER__ void sync() const;
-};
-
-/** \brief The multi-grid cooperative group type
- *
- *  \details Represents an inter-device cooperative group type where the
- *           participating threads within the group spans across multple
- *           devices, running the (same) kernel on these devices
- */
-class multi_grid_group : public thread_group {
-  // Only these friend functions are allowed to construct an object of this class
-  // and access its resources
-  friend __CG_QUALIFIER__ multi_grid_group this_multi_grid();
-
- protected:
-  // Construct mutli-grid thread group (through the API this_multi_grid())
-  explicit __CG_QUALIFIER__ multi_grid_group(uint32_t size)
-      : thread_group(internal::cg_multi_grid, size) { }
-
- public:
-  // Number of invocations participating in this multi-grid group. In other
-  // words, the number of GPUs
-	__CG_QUALIFIER__ uint32_t num_grids() {
-    return internal::multi_grid::num_grids();
-  }
-  // Rank of this invocation. In other words, an ID number within the range
-  // [0, num_grids()) of the GPU, this kernel is running on
-	__CG_QUALIFIER__ uint32_t grid_rank() {
-    return internal::multi_grid::grid_rank();
-  }
-  __CG_QUALIFIER__ uint32_t thread_rank() const {
-    return internal::multi_grid::thread_rank();
-  }
-  __CG_QUALIFIER__ bool is_valid() const {
-    return internal::multi_grid::is_valid();
-  }
-  __CG_QUALIFIER__ void sync() const {
-    internal::multi_grid::sync();
-  }
-};
-
-/** \brief User exposed API interface to construct multi-grid cooperative
- *         group type object - `multi_grid_group`
- *
- *  \details User is not allowed to directly construct an object of type
- *           `multi_grid_group`. Instead, he should construct it through this
- *           API function
- */
-__CG_QUALIFIER__ multi_grid_group
-this_multi_grid() {
-  return multi_grid_group(internal::multi_grid::size());
-}
-
-/** \brief The grid cooperative group type
- *
- *  \details Represents an inter-workgroup cooperative group type where the
- *           participating threads within the group spans across multiple
- *           workgroups running the (same) kernel on the same device
- */
-class grid_group : public thread_group {
-  // Only these friend functions are allowed to construct an object of this class
-  // and access its resources
-  friend __CG_QUALIFIER__ grid_group this_grid();
-
- protected:
-  // Construct grid thread group (through the API this_grid())
-  explicit __CG_QUALIFIER__ grid_group(uint32_t size)
-      : thread_group(internal::cg_grid, size) { }
-
- public:
-  __CG_QUALIFIER__ uint32_t thread_rank() const {
-    return internal::grid::thread_rank();
-  }
-  __CG_QUALIFIER__ bool is_valid() const {
-    return internal::grid::is_valid();
-  }
-  __CG_QUALIFIER__ void sync() const {
-    internal::grid::sync();
-  }
-};
-
-/** \brief User exposed API interface to construct grid cooperative group type
- *         object - `grid_group`
- *
- *  \details User is not allowed to directly construct an object of type
- *           `multi_grid_group`. Instead, he should construct it through this
- *           API function
- */
-__CG_QUALIFIER__ grid_group
-this_grid() {
-  return grid_group(internal::grid::size());
-}
-
-/** \brief The workgroup (thread-block in CUDA terminology) cooperative group
- *         type
- *
- *  \details Represents an intra-workgroup cooperative group type where the
- *           participating threads within the group are exctly the same threads
- *           which are participated in the currently executing `workgroup`
- */
-class thread_block : public thread_group {
-  // Only these friend functions are allowed to construct an object of this
-  // class and access its resources
-  friend __CG_QUALIFIER__ thread_block this_thread_block();
-
- protected:
-  // Construct a workgroup thread group (through the API this_thread_block())
-  explicit __CG_QUALIFIER__ thread_block(uint32_t size)
-      : thread_group(internal::cg_workgroup, size) { }
-
- public:
-  // 3-dimensional block index within the grid
-  __CG_QUALIFIER__ dim3 group_index() {
-    return internal::workgroup::group_index();
-  }
-  // 3-dimensional thread index within the block
-  __CG_QUALIFIER__ dim3 thread_index() {
-    return internal::workgroup::thread_index();
-  }
-  __CG_QUALIFIER__ uint32_t thread_rank() const {
-    return internal::workgroup::thread_rank();
-  }
-  __CG_QUALIFIER__ bool is_valid() const {
-    return internal::workgroup::is_valid();
-  }
-  __CG_QUALIFIER__ void sync() const {
-    internal::workgroup::sync();
-  }
-};
-
-/** \brief User exposed API interface to construct workgroup cooperative
- *         group type object - `thread_block`
- *
- *  \details User is not allowed to directly construct an object of type
- *           `thread_block`. Instead, he should construct it through this API
- *           function
- */
-__CG_QUALIFIER__ thread_block
-this_thread_block() {
-  return thread_block(internal::workgroup::size());
-}
-
-/**
- *  Implemenation of all publicly exposed base class APIs
- */
-__CG_QUALIFIER__ uint32_t thread_group::thread_rank() const {
-  switch (this->_type) {
-    case internal::cg_multi_grid: {
-      return (static_cast<const multi_grid_group*>(this)->thread_rank());
-    }
-    case internal::cg_grid: {
-      return (static_cast<const grid_group*>(this)->thread_rank());
-    }
-    case internal::cg_workgroup: {
-      return (static_cast<const thread_block*>(this)->thread_rank());
-    }
-    default: {
-      assert(false && "invalid cooperative group type");
-      return -1;
-    }
-  }
-}
-
-__CG_QUALIFIER__ bool thread_group::is_valid() const {
-  switch (this->_type) {
-    case internal::cg_multi_grid: {
-      return (static_cast<const multi_grid_group*>(this)->is_valid());
-    }
-    case internal::cg_grid: {
-      return (static_cast<const grid_group*>(this)->is_valid());
-    }
-    case internal::cg_workgroup: {
-      return (static_cast<const thread_block*>(this)->is_valid());
-    }
-    default: {
-      assert(false && "invalid cooperative group type");
-      return false;
-    }
-  }
-}
-
-__CG_QUALIFIER__ void thread_group::sync() const {
-  switch (this->_type) {
-    case internal::cg_multi_grid: {
-      static_cast<const multi_grid_group*>(this)->sync();
-      break;
-    }
-    case internal::cg_grid: {
-      static_cast<const grid_group*>(this)->sync();
-      break;
-    }
-    case internal::cg_workgroup: {
-      static_cast<const thread_block*>(this)->sync();
-      break;
-    }
-    default: {
-      assert(false && "invalid cooperative group type");
-    }
-  }
-}
-
-/**
- *  Implemenation of publicly exposed `wrapper` APIs on top of basic cooperative
- *  group type APIs
- */
-template <class CGTy>
-__CG_QUALIFIER__ uint32_t group_size(CGTy const &g) {
-  return g.size();
-}
-
-template <class CGTy>
-__CG_QUALIFIER__ uint32_t thread_rank(CGTy const &g) {
-  return g.thread_rank();
-}
-
-template <class CGTy>
-__CG_QUALIFIER__ bool is_valid(CGTy const &g) {
-  return g.is_valid();
-}
-
-template <class CGTy>
-__CG_QUALIFIER__ void sync(CGTy const &g) {
-  g.sync();
-}
-
-} // namespace cooperative_groups
-
-#endif // __cplusplus
-#endif // HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COOPERATIVE_GROUPS_H
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_cooperative_groups_helper.h b/third_party/rocm/include/hip/hcc_detail/hip_cooperative_groups_helper.h
deleted file mode 100644
index 4e10c0d..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_cooperative_groups_helper.h
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/**
- *  @file  hcc_detail/hip_cooperative_groups_helper.h
- *
- *  @brief Device side implementation of cooperative group feature.
- *
- *  Defines helper constructs and APIs which aid the types and device API
- *  wrappers defined within `hcc_detail/hip_cooperative_groups.h`.
- */
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COOPERATIVE_GROUPS_HELPER_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COOPERATIVE_GROUPS_HELPER_H
-
-#if __cplusplus
-#include <hip/hcc_detail/hip_runtime_api.h>
-#include <hip/hcc_detail/device_functions.h>
-
-#if !defined(__align__)
-#define __align__(x) __attribute__((aligned(x)))
-#endif
-
-#if !defined(__CG_QUALIFIER__)
-#define __CG_QUALIFIER__ __device__ __forceinline__
-#endif
-
-#if !defined(__CG_STATIC_QUALIFIER__)
-#define __CG_STATIC_QUALIFIER__ __device__ static __forceinline__
-#endif
-
-#if !defined(WAVEFRONT_SIZE)
-#define WAVEFRONT_SIZE 64
-#endif
-
-namespace cooperative_groups {
-
-namespace internal {
-
-/** \brief Enums representing different cooperative group types
- */
-typedef enum {
-  cg_invalid,
-  cg_multi_grid,
-  cg_grid,
-  cg_workgroup
-} group_type;
-
-/**
- *  Functionalities related to multi-grid cooperative group type
- */
-namespace multi_grid {
-
-__CG_STATIC_QUALIFIER__ uint32_t num_grids() {
-  return (uint32_t)__ockl_multi_grid_num_grids();
-}
-
-__CG_STATIC_QUALIFIER__ uint32_t grid_rank() {
-  return (uint32_t)__ockl_multi_grid_grid_rank();
-}
-
-__CG_STATIC_QUALIFIER__ uint32_t size() {
-  return (uint32_t)__ockl_multi_grid_size();
-}
-
-__CG_STATIC_QUALIFIER__ uint32_t thread_rank() {
-  return (uint32_t)__ockl_multi_grid_thread_rank();
-}
-
-__CG_STATIC_QUALIFIER__ bool is_valid() {
-  return (bool)__ockl_multi_grid_is_valid();
-}
-
-__CG_STATIC_QUALIFIER__ void sync() {
-  __ockl_multi_grid_sync();
-}
-
-} // namespace multi_grid
-
-/**
- *  Functionalities related to grid cooperative group type
- */
-namespace grid {
-
-__CG_STATIC_QUALIFIER__ uint32_t size() {
-  return (uint32_t)((hipBlockDim_z * hipGridDim_z) *
-                    (hipBlockDim_y * hipGridDim_y) *
-                    (hipBlockDim_x * hipGridDim_x));
-}
-
-__CG_STATIC_QUALIFIER__ uint32_t thread_rank() {
-  // Compute global id of the workgroup to which the current thread belongs to
-  uint32_t blkIdx =
-           (uint32_t)((hipBlockIdx_z * hipGridDim_y * hipGridDim_x) +
-                      (hipBlockIdx_y * hipGridDim_x) +
-                      (hipBlockIdx_x));
-
-  // Compute total number of threads being passed to reach current workgroup
-  // within grid
-  uint32_t num_threads_till_current_workgroup =
-           (uint32_t)(blkIdx * (hipBlockDim_x * hipBlockDim_y * hipBlockDim_z));
-
-  // Compute thread local rank within current workgroup
-  uint32_t local_thread_rank =
-           (uint32_t)((hipThreadIdx_z * hipBlockDim_y * hipBlockDim_x) +
-                      (hipThreadIdx_y * hipBlockDim_x) +
-                      (hipThreadIdx_x));
-
-  return (num_threads_till_current_workgroup + local_thread_rank);
-}
-
-__CG_STATIC_QUALIFIER__ bool is_valid() {
-  return (bool)__ockl_grid_is_valid();
-}
-
-__CG_STATIC_QUALIFIER__ void sync() {
-  __ockl_grid_sync();
-}
-
-} // namespace grid
-
-/**
- *  Functionalities related to `workgroup` (thread_block in CUDA terminology)
- *  cooperative group type
- */
-namespace workgroup {
-
-__CG_STATIC_QUALIFIER__ dim3 group_index() {
-  return (dim3((uint32_t)hipBlockIdx_x, (uint32_t)hipBlockIdx_y,
-               (uint32_t)hipBlockIdx_z));
-}
-
-__CG_STATIC_QUALIFIER__ dim3 thread_index() {
-  return (dim3((uint32_t)hipThreadIdx_x, (uint32_t)hipThreadIdx_y,
-               (uint32_t)hipThreadIdx_z));
-}
-
-__CG_STATIC_QUALIFIER__ uint32_t size() {
-  return((uint32_t)(hipBlockDim_x * hipBlockDim_y * hipBlockDim_z));
-}
-
-__CG_STATIC_QUALIFIER__ uint32_t thread_rank() {
- return ((uint32_t)((hipThreadIdx_z * hipBlockDim_y * hipBlockDim_x) +
-                    (hipThreadIdx_y * hipBlockDim_x) +
-                    (hipThreadIdx_x)));
-}
-
-__CG_STATIC_QUALIFIER__ bool is_valid() {
-   //TODO(mahesha) any functionality need to be added here? I believe not
-  return true;
-}
-
-__CG_STATIC_QUALIFIER__ void sync() {
-  __syncthreads();
-}
-
-} // namespace workgroup
-
-} // namespace internal
-
-} // namespace cooperative_groups
-
-#endif // __cplusplus
-#endif // HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COOPERATIVE_GROUPS_HELPER_H
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_fp16.h b/third_party/rocm/include/hip/hcc_detail/hip_fp16.h
deleted file mode 100644
index af004a8..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_fp16.h
+++ /dev/null
@@ -1,1658 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#pragma once
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_FP16_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_FP16_H
-
-#include <hip/hcc_detail/hip_common.h>
-
-#include "hip/hcc_detail/host_defines.h"
-#include <assert.h>
-#if defined(__cplusplus)
-    #include <algorithm>
-    #include <type_traits>
-    #include <utility>
-#endif
-
-#if __HCC_OR_HIP_CLANG__
-    typedef _Float16 _Float16_2 __attribute__((ext_vector_type(2)));
-
-    struct __half_raw {
-        union {
-            static_assert(sizeof(_Float16) == sizeof(unsigned short), "");
-
-            _Float16 data;
-            unsigned short x;
-        };
-    };
-
-    struct __half2_raw {
-        union {
-            static_assert(sizeof(_Float16_2) == sizeof(unsigned short[2]), "");
-
-            _Float16_2 data;
-            struct {
-                unsigned short x;
-                unsigned short y;
-            };
-        };
-    };
-
-    #if defined(__cplusplus)
-        #include "hip_fp16_math_fwd.h"
-        #include "hip_vector_types.h"
-        #include "host_defines.h"
-
-        namespace std
-        {
-            template<> struct is_floating_point<_Float16> : std::true_type {};
-        }
-
-        template<bool cond, typename T = void>
-        using Enable_if_t = typename std::enable_if<cond, T>::type;
-
-        // BEGIN STRUCT __HALF
-        struct __half {
-        protected:
-            union {
-                static_assert(sizeof(_Float16) == sizeof(unsigned short), "");
-
-                _Float16 data;
-                unsigned short __x;
-            };
-        public:
-            // CREATORS
-            __host__ __device__
-            __half() = default;
-            __host__ __device__
-            __half(const __half_raw& x) : data{x.data} {}
-            #if !defined(__HIP_NO_HALF_CONVERSIONS__)
-                __host__ __device__
-                __half(decltype(data) x) : data{x} {}
-                template<
-                    typename T,
-                    Enable_if_t<std::is_floating_point<T>{}>* = nullptr>
-                __host__ __device__
-                __half(T x) : data{static_cast<_Float16>(x)} {}
-            #endif
-            __host__ __device__
-            __half(const __half&) = default;
-            __host__ __device__
-            __half(__half&&) = default;
-            __host__ __device__
-            ~__half() = default;
-
-            // CREATORS - DEVICE ONLY
-            #if !defined(__HIP_NO_HALF_CONVERSIONS__)
-                template<
-                    typename T, Enable_if_t<std::is_integral<T>{}>* = nullptr>
-                __host__ __device__
-                __half(T x) : data{static_cast<_Float16>(x)} {}
-            #endif
-
-            // MANIPULATORS
-            __host__ __device__
-            __half& operator=(const __half&) = default;
-            __host__ __device__
-            __half& operator=(__half&&) = default;
-            __host__ __device__
-            __half& operator=(const __half_raw& x)
-            {
-                data = x.data;
-                return *this;
-            }
-            __host__ __device__
-            volatile __half& operator=(const __half_raw& x) volatile
-            {
-                data = x.data;
-                return *this;
-            }
-            volatile __half& operator=(const volatile __half_raw& x) volatile
-            {
-                data = x.data;
-                return *this;
-            }
-            __half& operator=(__half_raw&& x)
-            {
-                data = x.data;
-                return *this;
-            }
-            volatile __half& operator=(__half_raw&& x) volatile
-            {
-                data = x.data;
-                return *this;
-            }
-            volatile __half& operator=(volatile __half_raw&& x) volatile
-            {
-                data = x.data;
-                return *this;
-            }
-            #if !defined(__HIP_NO_HALF_CONVERSIONS__)
-                template<
-                    typename T,
-                    Enable_if_t<std::is_floating_point<T>{}>* = nullptr>
-                __host__ __device__
-                __half& operator=(T x)
-                {
-                    data = static_cast<_Float16>(x);
-                    return *this;
-                }
-            #endif
-
-            // MANIPULATORS - DEVICE ONLY
-            #if !defined(__HIP_NO_HALF_CONVERSIONS__)
-                template<
-                    typename T, Enable_if_t<std::is_integral<T>{}>* = nullptr>
-                __device__
-                __half& operator=(T x)
-                {
-                    data = static_cast<_Float16>(x);
-                    return *this;
-                }
-            #endif
-
-            #if !defined(__HIP_NO_HALF_OPERATORS__)
-                __device__
-                __half& operator+=(const __half& x)
-                {
-                    data += x.data;
-                    return *this;
-                }
-                __device__
-                __half& operator-=(const __half& x)
-                {
-                    data -= x.data;
-                    return *this;
-                }
-                __device__
-                __half& operator*=(const __half& x)
-                {
-                    data *= x.data;
-                    return *this;
-                }
-                __device__
-                __half& operator/=(const __half& x)
-                {
-                    data /= x.data;
-                    return *this;
-                }
-                __device__
-                __half& operator++() { ++data; return *this; }
-                __device__
-                __half operator++(int)
-                {
-                    __half tmp{*this};
-                    ++*this;
-                    return tmp;
-                }
-                __device__
-                __half& operator--() { --data; return *this; }
-                __device__
-                __half operator--(int)
-                {
-                    __half tmp{*this};
-                    --*this;
-                    return tmp;
-                }
-            #endif
-
-            // ACCESSORS
-            #if !defined(__HIP_NO_HALF_CONVERSIONS__)
-                template<
-                    typename T,
-                    Enable_if_t<std::is_floating_point<T>{}>* = nullptr>
-                __host__ __device__
-                operator T() const { return data; }
-            #endif
-            __host__ __device__
-            operator __half_raw() const { return __half_raw{data}; }
-            __host__ __device__
-            operator __half_raw() const volatile
-            {
-                return __half_raw{data};
-            }
-
-            #if !defined(__HIP_NO_HALF_CONVERSIONS__)
-                template<
-                    typename T, Enable_if_t<std::is_integral<T>{}>* = nullptr>
-                __host__ __device__
-                operator T() const { return data; }
-            #endif
-
-            #if !defined(__HIP_NO_HALF_OPERATORS__)
-                __device__
-                __half operator+() const { return *this; }
-                __device__
-                __half operator-() const
-                {
-                    __half tmp{*this};
-                    tmp.data = -tmp.data;
-                    return tmp;
-                }
-            #endif
-
-            // FRIENDS
-            #if !defined(__HIP_NO_HALF_OPERATORS__)
-                friend
-                inline
-                __device__
-                __half operator+(const __half& x, const __half& y)
-                {
-                    return __half{x} += y;
-                }
-                friend
-                inline
-                __device__
-                __half operator-(const __half& x, const __half& y)
-                {
-                    return __half{x} -= y;
-                }
-                friend
-                inline
-                __device__
-                __half operator*(const __half& x, const __half& y)
-                {
-                    return __half{x} *= y;
-                }
-                friend
-                inline
-                __device__
-                __half operator/(const __half& x, const __half& y)
-                {
-                    return __half{x} /= y;
-                }
-                friend
-                inline
-                __device__
-                bool operator==(const __half& x, const __half& y)
-                {
-                    return x.data == y.data;
-                }
-                friend
-                inline
-                __device__
-                bool operator!=(const __half& x, const __half& y)
-                {
-                    return !(x == y);
-                }
-                friend
-                inline
-                __device__
-                bool operator<(const __half& x, const __half& y)
-                {
-                    return x.data < y.data;
-                }
-                friend
-                inline
-                __device__
-                bool operator>(const __half& x, const __half& y)
-                {
-                    return y.data < x.data;
-                }
-                friend
-                inline
-                __device__
-                bool operator<=(const __half& x, const __half& y)
-                {
-                    return !(y < x);
-                }
-                friend
-                inline
-                __device__
-                bool operator>=(const __half& x, const __half& y)
-                {
-                    return !(x < y);
-                }
-            #endif // !defined(__HIP_NO_HALF_OPERATORS__)
-        };
-        // END STRUCT __HALF
-
-        // BEGIN STRUCT __HALF2
-        struct __half2 {
-        protected:
-            union {
-                static_assert(
-                    sizeof(_Float16_2) == sizeof(unsigned short[2]), "");
-
-                _Float16_2 data;
-                struct {
-                    unsigned short x;
-                    unsigned short y;
-                };
-            };
-        public:
-            // CREATORS
-            __host__ __device__
-            __half2() = default;
-            __host__ __device__
-            __half2(const __half2_raw& x) : data{x.data} {}
-            __host__ __device__
-            __half2(decltype(data) x) : data{x} {}
-            __host__ __device__
-            __half2(const __half& x, const __half& y)
-                :
-                data{
-                    static_cast<__half_raw>(x).data,
-                    static_cast<__half_raw>(y).data}
-            {}
-            __host__ __device__
-            __half2(const __half2&) = default;
-            __host__ __device__
-            __half2(__half2&&) = default;
-            __host__ __device__
-            ~__half2() = default;
-
-            // MANIPULATORS
-            __host__ __device__
-            __half2& operator=(const __half2&) = default;
-            __host__ __device__
-            __half2& operator=(__half2&&) = default;
-            __host__ __device__
-            __half2& operator=(const __half2_raw& x)
-            {
-                data = x.data;
-                return *this;
-            }
- 
-            // MANIPULATORS - DEVICE ONLY
-            #if !defined(__HIP_NO_HALF_OPERATORS__)
-                __device__
-                __half2& operator+=(const __half2& x)
-                {
-                    data += x.data;
-                    return *this;
-                }
-                __device__
-                __half2& operator-=(const __half2& x)
-                {
-                    data -= x.data;
-                    return *this;
-                }
-                __device__
-                __half2& operator*=(const __half2& x)
-                {
-                    data *= x.data;
-                    return *this;
-                }
-                __device__
-                __half2& operator/=(const __half2& x)
-                {
-                    data /= x.data;
-                    return *this;
-                }
-                __device__
-                __half2& operator++() { return *this += _Float16_2{1, 1}; }
-                __device__
-                __half2 operator++(int)
-                {
-                    __half2 tmp{*this};
-                    ++*this;
-                    return tmp;
-                }
-                __device__
-                __half2& operator--() { return *this -= _Float16_2{1, 1}; }
-                __device__
-                __half2 operator--(int)
-                {
-                    __half2 tmp{*this};
-                    --*this;
-                    return tmp;
-                }
-            #endif
-
-            // ACCESSORS
-            __host__ __device__
-            operator decltype(data)() const { return data; }
-            __host__ __device__
-            operator __half2_raw() const { return __half2_raw{data}; }
-
-            // ACCESSORS - DEVICE ONLY
-            #if !defined(__HIP_NO_HALF_OPERATORS__)
-                __device__
-                __half2 operator+() const { return *this; }
-                __device__
-                __half2 operator-() const
-                {
-                    __half2 tmp{*this};
-                    tmp.data = -tmp.data;
-                    return tmp;
-                }
-            #endif
-
-            // FRIENDS
-            #if !defined(__HIP_NO_HALF_OPERATORS__)
-                friend
-                inline
-                __device__
-                __half2 operator+(const __half2& x, const __half2& y)
-                {
-                    return __half2{x} += y;
-                }
-                friend
-                inline
-                __device__
-                __half2 operator-(const __half2& x, const __half2& y)
-                {
-                    return __half2{x} -= y;
-                }
-                friend
-                inline
-                __device__
-                __half2 operator*(const __half2& x, const __half2& y)
-                {
-                    return __half2{x} *= y;
-                }
-                friend
-                inline
-                __device__
-                __half2 operator/(const __half2& x, const __half2& y)
-                {
-                    return __half2{x} /= y;
-                }
-                friend
-                inline
-                __device__
-                bool operator==(const __half2& x, const __half2& y)
-                {
-                    auto r = x.data == y.data;
-                    return r.x != 0 && r.y != 0;
-                }
-                friend
-                inline
-                __device__
-                bool operator!=(const __half2& x, const __half2& y)
-                {
-                    return !(x == y);
-                }
-                friend
-                inline
-                __device__
-                bool operator<(const __half2& x, const __half2& y)
-                {
-                    auto r = x.data < y.data;
-                    return r.x != 0 && r.y != 0;
-                }
-                friend
-                inline
-                __device__
-                bool operator>(const __half2& x, const __half2& y)
-                {
-                    return y < x;
-                }
-                friend
-                inline
-                __device__
-                bool operator<=(const __half2& x, const __half2& y)
-                {
-                    return !(y < x);
-                }
-                friend
-                inline
-                __device__
-                bool operator>=(const __half2& x, const __half2& y)
-                {
-                    return !(x < y);
-                }
-            #endif // !defined(__HIP_NO_HALF_OPERATORS__)
-        };
-        // END STRUCT __HALF2
-
-        namespace
-        {
-            inline
-            __host__ __device__
-            __half2 make_half2(__half x, __half y)
-            {
-                return __half2{x, y};
-            }
-
-            inline
-            __host__ __device__
-            __half __low2half(__half2 x)
-            {
-                return __half{__half_raw{static_cast<__half2_raw>(x).data.x}};
-            }
-
-            inline
-            __host__ __device__
-            __half __high2half(__half2 x)
-            {
-                return __half{__half_raw{static_cast<__half2_raw>(x).data.y}};
-            }
-
-            inline
-            __host__ __device__
-            __half2 __half2half2(__half x)
-            {
-                return __half2{x, x};
-            }
-
-            inline
-            __host__ __device__
-            __half2 __halves2half2(__half x, __half y)
-            {
-                return __half2{x, y};
-            }
-
-            inline
-            __host__ __device__
-            __half2 __low2half2(__half2 x)
-            {
-                return __half2{
-                    _Float16_2{
-                        static_cast<__half2_raw>(x).data.x,
-                        static_cast<__half2_raw>(x).data.x}};
-            }
-
-            inline
-            __host__ __device__
-            __half2 __high2half2(__half2 x)
-            {
-                return __half2_raw{
-                    _Float16_2{
-                        static_cast<__half2_raw>(x).data.y,
-                        static_cast<__half2_raw>(x).data.y}};
-            }
-
-            inline
-            __host__ __device__
-            __half2 __lows2half2(__half2 x, __half2 y)
-            {
-                return __half2_raw{
-                    _Float16_2{
-                        static_cast<__half2_raw>(x).data.x,
-                        static_cast<__half2_raw>(y).data.x}};
-            }
-
-            inline
-            __host__ __device__
-            __half2 __highs2half2(__half2 x, __half2 y)
-            {
-                return __half2_raw{
-                    _Float16_2{
-                        static_cast<__half2_raw>(x).data.y,
-                        static_cast<__half2_raw>(y).data.y}};
-            }
-
-            inline
-            __host__ __device__
-            __half2 __lowhigh2highlow(__half2 x)
-            {
-                return __half2_raw{
-                    _Float16_2{
-                        static_cast<__half2_raw>(x).data.y,
-                        static_cast<__half2_raw>(x).data.x}};
-            }
-
-            // Bitcasts
-            inline
-            __device__
-            short __half_as_short(__half x)
-            {
-                return static_cast<__half_raw>(x).x;
-            }
-
-            inline
-            __device__
-            unsigned short __half_as_ushort(__half x)
-            {
-                return static_cast<__half_raw>(x).x;
-            }
-
-            inline
-            __device__
-            __half __short_as_half(short x)
-            {
-                __half_raw r; r.x = x;
-                return r;
-            }
-
-            inline
-            __device__
-            __half __ushort_as_half(unsigned short x)
-            {
-                __half_raw r; r.x = x;
-                return r;
-            }
-
-            // TODO: rounding behaviour is not correct.
-            // float -> half | half2
-            inline
-            __device__ __host__
-            __half __float2half(float x)
-            {
-                return __half_raw{static_cast<_Float16>(x)};
-            }
-            inline
-            __device__ __host__
-            __half __float2half_rn(float x)
-            {
-                return __half_raw{static_cast<_Float16>(x)};
-            }
-            inline
-            __device__ __host__
-            __half __float2half_rz(float x)
-            {
-                return __half_raw{static_cast<_Float16>(x)};
-            }
-            inline
-            __device__ __host__
-            __half __float2half_rd(float x)
-            {
-                return __half_raw{static_cast<_Float16>(x)};
-            }
-            inline
-            __device__ __host__
-            __half __float2half_ru(float x)
-            {
-                return __half_raw{static_cast<_Float16>(x)};
-            }
-            inline
-            __device__ __host__
-            __half2 __float2half2_rn(float x)
-            {
-                return __half2_raw{
-                    _Float16_2{
-                        static_cast<_Float16>(x), static_cast<_Float16>(x)}};
-            }
-            inline
-            __device__ __host__
-            __half2 __floats2half2_rn(float x, float y)
-            {
-                return __half2_raw{_Float16_2{
-                    static_cast<_Float16>(x), static_cast<_Float16>(y)}};
-            }
-            inline
-            __device__ __host__
-            __half2 __float22half2_rn(float2 x)
-            {
-                return __floats2half2_rn(x.x, x.y);
-            }
-
-            // half | half2 -> float
-            inline
-            __device__ __host__
-            float __half2float(__half x)
-            {
-                return static_cast<__half_raw>(x).data;
-            }
-            inline
-            __device__ __host__
-            float __low2float(__half2 x)
-            {
-                return static_cast<__half2_raw>(x).data.x;
-            }
-            inline
-            __device__ __host__
-            float __high2float(__half2 x)
-            {
-                return static_cast<__half2_raw>(x).data.y;
-            }
-            inline
-            __device__ __host__
-            float2 __half22float2(__half2 x)
-            {
-                return make_float2(
-                    static_cast<__half2_raw>(x).data.x,
-                    static_cast<__half2_raw>(x).data.y);
-            }
-
-            // half -> int
-            inline
-            __device__
-            int __half2int_rn(__half x)
-            {
-                return static_cast<__half_raw>(x).data;
-            }
-            inline
-            __device__
-            int __half2int_rz(__half x)
-            {
-                return static_cast<__half_raw>(x).data;
-            }
-            inline
-            __device__
-            int __half2int_rd(__half x)
-            {
-                return static_cast<__half_raw>(x).data;
-            }
-            inline
-            __device__
-            int __half2int_ru(__half x)
-            {
-                return static_cast<__half_raw>(x).data;
-            }
-
-            // int -> half
-            inline
-            __device__
-            __half __int2half_rn(int x)
-            {
-                return __half_raw{static_cast<_Float16>(x)};
-            }
-            inline
-            __device__
-            __half __int2half_rz(int x)
-            {
-                return __half_raw{static_cast<_Float16>(x)};
-            }
-            inline
-            __device__
-            __half __int2half_rd(int x)
-            {
-                return __half_raw{static_cast<_Float16>(x)};
-            }
-            inline
-            __device__
-            __half __int2half_ru(int x)
-            {
-                return __half_raw{static_cast<_Float16>(x)};
-            }
-
-            // half -> short
-            inline
-            __device__
-            short __half2short_rn(__half x)
-            {
-                return static_cast<__half_raw>(x).data;
-            }
-            inline
-            __device__
-            short __half2short_rz(__half x)
-            {
-                return static_cast<__half_raw>(x).data;
-            }
-            inline
-            __device__
-            short __half2short_rd(__half x)
-            {
-                return static_cast<__half_raw>(x).data;
-            }
-            inline
-            __device__
-            short __half2short_ru(__half x)
-            {
-                return static_cast<__half_raw>(x).data;
-            }
-
-            // short -> half
-            inline
-            __device__
-            __half __short2half_rn(short x)
-            {
-                return __half_raw{static_cast<_Float16>(x)};
-            }
-            inline
-            __device__
-            __half __short2half_rz(short x)
-            {
-                return __half_raw{static_cast<_Float16>(x)};
-            }
-            inline
-            __device__
-            __half __short2half_rd(short x)
-            {
-                return __half_raw{static_cast<_Float16>(x)};
-            }
-            inline
-            __device__
-            __half __short2half_ru(short x)
-            {
-                return __half_raw{static_cast<_Float16>(x)};
-            }
-
-            // half -> long long
-            inline
-            __device__
-            long long __half2ll_rn(__half x)
-            {
-                return static_cast<__half_raw>(x).data;
-            }
-            inline
-            __device__
-            long long __half2ll_rz(__half x)
-            {
-                return static_cast<__half_raw>(x).data;
-            }
-            inline
-            __device__
-            long long __half2ll_rd(__half x)
-            {
-                return static_cast<__half_raw>(x).data;
-            }
-            inline
-            __device__
-            long long __half2ll_ru(__half x)
-            {
-                return static_cast<__half_raw>(x).data;
-            }
-
-            // long long -> half
-            inline
-            __device__
-            __half __ll2half_rn(long long x)
-            {
-                return __half_raw{static_cast<_Float16>(x)};
-            }
-            inline
-            __device__
-            __half __ll2half_rz(long long x)
-            {
-                return __half_raw{static_cast<_Float16>(x)};
-            }
-            inline
-            __device__
-            __half __ll2half_rd(long long x)
-            {
-                return __half_raw{static_cast<_Float16>(x)};
-            }
-            inline
-            __device__
-            __half __ll2half_ru(long long x)
-            {
-                return __half_raw{static_cast<_Float16>(x)};
-            }
-
-            // half -> unsigned int
-            inline
-            __device__
-            unsigned int __half2uint_rn(__half x)
-            {
-                return static_cast<__half_raw>(x).data;
-            }
-            inline
-            __device__
-            unsigned int __half2uint_rz(__half x)
-            {
-                return static_cast<__half_raw>(x).data;
-            }
-            inline
-            __device__
-            unsigned int __half2uint_rd(__half x)
-            {
-                return static_cast<__half_raw>(x).data;
-            }
-            inline
-            __device__
-            unsigned int __half2uint_ru(__half x)
-            {
-                return static_cast<__half_raw>(x).data;
-            }
-
-            // unsigned int -> half
-            inline
-            __device__
-            __half __uint2half_rn(unsigned int x)
-            {
-                return __half_raw{static_cast<_Float16>(x)};
-            }
-            inline
-            __device__
-            __half __uint2half_rz(unsigned int x)
-            {
-                return __half_raw{static_cast<_Float16>(x)};
-            }
-            inline
-            __device__
-            __half __uint2half_rd(unsigned int x)
-            {
-                return __half_raw{static_cast<_Float16>(x)};
-            }
-            inline
-            __device__
-            __half __uint2half_ru(unsigned int x)
-            {
-                return __half_raw{static_cast<_Float16>(x)};
-            }
-
-            // half -> unsigned short
-            inline
-            __device__
-            unsigned short __half2ushort_rn(__half x)
-            {
-                return static_cast<__half_raw>(x).data;
-            }
-            inline
-            __device__
-            unsigned short __half2ushort_rz(__half x)
-            {
-                return static_cast<__half_raw>(x).data;
-            }
-            inline
-            __device__
-            unsigned short __half2ushort_rd(__half x)
-            {
-                return static_cast<__half_raw>(x).data;
-            }
-            inline
-            __device__
-            unsigned short __half2ushort_ru(__half x)
-            {
-                return static_cast<__half_raw>(x).data;
-            }
-
-            // unsigned short -> half
-            inline
-            __device__
-            __half __ushort2half_rn(unsigned short x)
-            {
-                return __half_raw{static_cast<_Float16>(x)};
-            }
-            inline
-            __device__
-            __half __ushort2half_rz(unsigned short x)
-            {
-                return __half_raw{static_cast<_Float16>(x)};
-            }
-            inline
-            __device__
-            __half __ushort2half_rd(unsigned short x)
-            {
-                return __half_raw{static_cast<_Float16>(x)};
-            }
-            inline
-            __device__
-            __half __ushort2half_ru(unsigned short x)
-            {
-                return __half_raw{static_cast<_Float16>(x)};
-            }
-
-            // half -> unsigned long long
-            inline
-            __device__
-            unsigned long long __half2ull_rn(__half x)
-            {
-                return static_cast<__half_raw>(x).data;
-            }
-            inline
-            __device__
-            unsigned long long __half2ull_rz(__half x)
-            {
-                return static_cast<__half_raw>(x).data;
-            }
-            inline
-            __device__
-            unsigned long long __half2ull_rd(__half x)
-            {
-                return static_cast<__half_raw>(x).data;
-            }
-            inline
-            __device__
-            unsigned long long __half2ull_ru(__half x)
-            {
-                return static_cast<__half_raw>(x).data;
-            }
-
-            // unsigned long long -> half
-            inline
-            __device__
-            __half __ull2half_rn(unsigned long long x)
-            {
-                return __half_raw{static_cast<_Float16>(x)};
-            }
-            inline
-            __device__
-            __half __ull2half_rz(unsigned long long x)
-            {
-                return __half_raw{static_cast<_Float16>(x)};
-            }
-            inline
-            __device__
-            __half __ull2half_rd(unsigned long long x)
-            {
-                return __half_raw{static_cast<_Float16>(x)};
-            }
-            inline
-            __device__
-            __half __ull2half_ru(unsigned long long x)
-            {
-                return __half_raw{static_cast<_Float16>(x)};
-            }
-
-            // Load primitives
-            inline
-            __device__
-            __half __ldg(const __half* ptr) { return *ptr; }
-            inline
-            __device__
-            __half __ldcg(const __half* ptr) { return *ptr; }
-            inline
-            __device__
-            __half __ldca(const __half* ptr) { return *ptr; }
-            inline
-            __device__
-            __half __ldcs(const __half* ptr) { return *ptr; }
-
-            inline
-            __host__ __device__
-            __half2 __ldg(const __half2* ptr) { return *ptr; }
-            inline
-            __host__ __device__
-            __half2 __ldcg(const __half2* ptr) { return *ptr; }
-            inline
-            __host__ __device__
-            __half2 __ldca(const __half2* ptr) { return *ptr; }
-            inline
-            __host__ __device__
-            __half2 __ldcs(const __half2* ptr) { return *ptr; }
-
-            // Relations
-            inline
-            __device__
-            bool __heq(__half x, __half y)
-            {
-                return static_cast<__half_raw>(x).data ==
-                    static_cast<__half_raw>(y).data;
-            }
-            inline
-            __device__
-            bool __hne(__half x, __half y)
-            {
-                return static_cast<__half_raw>(x).data !=
-                    static_cast<__half_raw>(y).data;
-            }
-            inline
-            __device__
-            bool __hle(__half x, __half y)
-            {
-                return static_cast<__half_raw>(x).data <=
-                    static_cast<__half_raw>(y).data;
-            }
-            inline
-            __device__
-            bool __hge(__half x, __half y)
-            {
-                return static_cast<__half_raw>(x).data >=
-                    static_cast<__half_raw>(y).data;
-            }
-            inline
-            __device__
-            bool __hlt(__half x, __half y)
-            {
-                return static_cast<__half_raw>(x).data <
-                    static_cast<__half_raw>(y).data;
-            }
-            inline
-            __device__
-            bool __hgt(__half x, __half y)
-            {
-                return static_cast<__half_raw>(x).data >
-                    static_cast<__half_raw>(y).data;
-            }
-            inline
-            __device__
-            bool __hequ(__half x, __half y) { return __heq(x, y); }
-            inline
-            __device__
-            bool __hneu(__half x, __half y) { return __hne(x, y); }
-            inline
-            __device__
-            bool __hleu(__half x, __half y) { return __hle(x, y); }
-            inline
-            __device__
-            bool __hgeu(__half x, __half y) { return __hge(x, y); }
-            inline
-            __device__
-            bool __hltu(__half x, __half y) { return __hlt(x, y); }
-            inline
-            __device__
-            bool __hgtu(__half x, __half y) { return __hgt(x, y); }
-
-            inline
-            __host__ __device__
-            __half2 __heq2(__half2 x, __half2 y)
-            {
-                auto r = static_cast<__half2_raw>(x).data ==
-                    static_cast<__half2_raw>(y).data;
-                return __builtin_convertvector(-r, _Float16_2);
-            }
-            inline
-            __host__ __device__
-            __half2 __hne2(__half2 x, __half2 y)
-            {
-                auto r = static_cast<__half2_raw>(x).data !=
-                    static_cast<__half2_raw>(y).data;
-                return __builtin_convertvector(-r, _Float16_2);
-            }
-            inline
-            __host__ __device__
-            __half2 __hle2(__half2 x, __half2 y)
-            {
-                auto r = static_cast<__half2_raw>(x).data <=
-                    static_cast<__half2_raw>(y).data;
-                return __builtin_convertvector(-r, _Float16_2);
-            }
-            inline
-            __host__ __device__
-            __half2 __hge2(__half2 x, __half2 y)
-            {
-                auto r = static_cast<__half2_raw>(x).data >=
-                    static_cast<__half2_raw>(y).data;
-                return __builtin_convertvector(-r, _Float16_2);
-            }
-            inline
-            __host__ __device__
-            __half2 __hlt2(__half2 x, __half2 y)
-            {
-                auto r = static_cast<__half2_raw>(x).data <
-                    static_cast<__half2_raw>(y).data;
-                return __builtin_convertvector(-r, _Float16_2);
-            }
-            inline
-            __host__ __device__
-            __half2 __hgt2(__half2 x, __half2 y)
-            {
-                auto r = static_cast<__half2_raw>(x).data >
-                    static_cast<__half2_raw>(y).data;
-                return __builtin_convertvector(-r, _Float16_2);
-            }
-            inline
-            __host__ __device__
-            __half2 __hequ2(__half2 x, __half2 y) { return __heq2(x, y); }
-            inline
-            __host__ __device__
-            __half2 __hneu2(__half2 x, __half2 y) { return __hne2(x, y); }
-            inline
-            __host__ __device__
-            __half2 __hleu2(__half2 x, __half2 y) { return __hle2(x, y); }
-            inline
-            __host__  __device__
-            __half2 __hgeu2(__half2 x, __half2 y) { return __hge2(x, y); }
-            inline
-            __host__ __device__
-            __half2 __hltu2(__half2 x, __half2 y) { return __hlt2(x, y); }
-            inline
-            __host__ __device__
-            __half2 __hgtu2(__half2 x, __half2 y) { return __hgt2(x, y); }
-
-            inline
-            __host__ __device__
-            bool __hbeq2(__half2 x, __half2 y)
-            {
-                auto r = static_cast<__half2_raw>(__heq2(x, y));
-                return r.data.x != 0 && r.data.y != 0;
-            }
-            inline
-            __host__ __device__
-            bool __hbne2(__half2 x, __half2 y)
-            {
-                auto r = static_cast<__half2_raw>(__hne2(x, y));
-                return r.data.x != 0 && r.data.y != 0;
-            }
-            inline
-            __host__ __device__
-            bool __hble2(__half2 x, __half2 y)
-            {
-                auto r = static_cast<__half2_raw>(__hle2(x, y));
-                return r.data.x != 0 && r.data.y != 0;
-            }
-            inline
-            __host__ __device__
-            bool __hbge2(__half2 x, __half2 y)
-            {
-                auto r = static_cast<__half2_raw>(__hge2(x, y));
-                return r.data.x != 0 && r.data.y != 0;
-            }
-            inline
-            __host__ __device__
-            bool __hblt2(__half2 x, __half2 y)
-            {
-                auto r = static_cast<__half2_raw>(__hlt2(x, y));
-                return r.data.x != 0 && r.data.y != 0;
-            }
-            inline
-            __host__ __device__
-            bool __hbgt2(__half2 x, __half2 y)
-            {
-                auto r = static_cast<__half2_raw>(__hgt2(x, y));
-                return r.data.x != 0 && r.data.y != 0;
-            }
-            inline
-            __host__ __device__
-            bool __hbequ2(__half2 x, __half2 y) { return __hbeq2(x, y); }
-            inline
-            __host__ __device__
-            bool __hbneu2(__half2 x, __half2 y) { return __hbne2(x, y); }
-            inline
-            __host__ __device__
-            bool __hbleu2(__half2 x, __half2 y) { return __hble2(x, y); }
-            inline
-            __host__ __device__
-            bool __hbgeu2(__half2 x, __half2 y) { return __hbge2(x, y); }
-            inline
-            __host__ __device__
-            bool __hbltu2(__half2 x, __half2 y) { return __hblt2(x, y); }
-            inline
-            __host__ __device__
-            bool __hbgtu2(__half2 x, __half2 y) { return __hbgt2(x, y); }
-
-            // Arithmetic
-            inline
-            __device__
-            __half __clamp_01(__half x)
-            {
-                auto r = static_cast<__half_raw>(x);
-
-                if (__hlt(x, __half_raw{0})) return __half_raw{0};
-                if (__hlt(__half_raw{1}, x)) return __half_raw{1};
-                return r;
-            }
-
-            inline
-            __device__
-            __half __hadd(__half x, __half y)
-            {
-                return __half_raw{
-                    static_cast<__half_raw>(x).data +
-                    static_cast<__half_raw>(y).data};
-            }
-	    inline
-	    __device__
-	    __half __habs(__half x)
-	    {
-	        return __half_raw{
-		    __ocml_fabs_f16(static_cast<__half_raw>(x).data)};
-	    }
-            inline
-            __device__
-            __half __hsub(__half x, __half y)
-            {
-                return __half_raw{
-                    static_cast<__half_raw>(x).data -
-                    static_cast<__half_raw>(y).data};
-            }
-            inline
-            __device__
-            __half __hmul(__half x, __half y)
-            {
-                return __half_raw{
-                    static_cast<__half_raw>(x).data *
-                    static_cast<__half_raw>(y).data};
-            }
-            inline
-            __device__
-            __half __hadd_sat(__half x, __half y)
-            {
-                return __clamp_01(__hadd(x, y));
-            }
-            inline
-            __device__
-            __half __hsub_sat(__half x, __half y)
-            {
-                return __clamp_01(__hsub(x, y));
-            }
-            inline
-            __device__
-            __half __hmul_sat(__half x, __half y)
-            {
-                return __clamp_01(__hmul(x, y));
-            }
-            inline
-            __device__
-            __half __hfma(__half x, __half y, __half z)
-            {
-                return __half_raw{__ocml_fma_f16(
-                    static_cast<__half_raw>(x).data,
-                    static_cast<__half_raw>(y).data,
-                    static_cast<__half_raw>(z).data)};
-            }
-            inline
-            __device__
-            __half __hfma_sat(__half x, __half y, __half z)
-            {
-                return __clamp_01(__hfma(x, y, z));
-            }
-            inline
-            __device__
-            __half __hdiv(__half x, __half y)
-            {
-                return __half_raw{
-                    static_cast<__half_raw>(x).data /
-                    static_cast<__half_raw>(y).data};
-            }
-
-            inline
-            __host__ __device__
-            __half2 __hadd2(__half2 x, __half2 y)
-            {
-                return __half2_raw{
-                    static_cast<__half2_raw>(x).data +
-                    static_cast<__half2_raw>(y).data};
-            }
-	    inline
-	    __host__ __device__
-	    __half2 __habs2(__half2 x)
-	    {
-	        return __half2_raw{
-		    __ocml_fabs_2f16(static_cast<__half2_raw>(x).data)};
-	    }
-            inline
-            __host__ __device__
-            __half2 __hsub2(__half2 x, __half2 y)
-            {
-                return __half2_raw{
-                    static_cast<__half2_raw>(x).data -
-                    static_cast<__half2_raw>(y).data};
-            }
-            inline
-            __host__ __device__
-            __half2 __hmul2(__half2 x, __half2 y)
-            {
-                return __half2_raw{
-                    static_cast<__half2_raw>(x).data *
-                    static_cast<__half2_raw>(y).data};
-            }
-            inline
-            __host__ __device__
-            __half2 __hadd2_sat(__half2 x, __half2 y)
-            {
-                auto r = static_cast<__half2_raw>(__hadd2(x, y));
-                return __half2{
-                    __clamp_01(__half_raw{r.data.x}),
-                    __clamp_01(__half_raw{r.data.y})};
-            }
-            inline
-            __host__ __device__
-            __half2 __hsub2_sat(__half2 x, __half2 y)
-            {
-                auto r = static_cast<__half2_raw>(__hsub2(x, y));
-                return __half2{
-                    __clamp_01(__half_raw{r.data.x}),
-                    __clamp_01(__half_raw{r.data.y})};
-            }
-            inline
-            __host__ __device__
-            __half2 __hmul2_sat(__half2 x, __half2 y)
-            {
-                auto r = static_cast<__half2_raw>(__hmul2(x, y));
-                return __half2{
-                    __clamp_01(__half_raw{r.data.x}),
-                    __clamp_01(__half_raw{r.data.y})};
-            }
-            inline
-            __host__ __device__
-            __half2 __hfma2(__half2 x, __half2 y, __half2 z)
-            {
-                return __half2_raw{__ocml_fma_2f16(x, y, z)};
-            }
-            inline
-            __host__ __device__
-            __half2 __hfma2_sat(__half2 x, __half2 y, __half2 z)
-            {
-                auto r = static_cast<__half2_raw>(__hfma2(x, y, z));
-                return __half2{
-                    __clamp_01(__half_raw{r.data.x}),
-                    __clamp_01(__half_raw{r.data.y})};
-            }
-            inline
-            __host__ __device__
-            __half2 __h2div(__half2 x, __half2 y)
-            {
-                return __half2_raw{
-                    static_cast<__half2_raw>(x).data /
-                    static_cast<__half2_raw>(y).data};
-            }
-
-            // Math functions
-            #if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__
-            inline
-            __device__
-            float amd_mixed_dot(__half2 a, __half2 b, float c, bool saturate) {
-                return __ockl_fdot2(static_cast<__half2_raw>(a).data,
-                                    static_cast<__half2_raw>(b).data,
-                                    c, saturate);
-            }
-            #endif
-            inline
-            __device__
-            __half htrunc(__half x)
-            {
-                return __half_raw{
-                    __ocml_trunc_f16(static_cast<__half_raw>(x).data)};
-            }
-            inline
-            __device__
-            __half hceil(__half x)
-            {
-                return __half_raw{
-                    __ocml_ceil_f16(static_cast<__half_raw>(x).data)};
-            }
-            inline
-            __device__
-            __half hfloor(__half x)
-            {
-                return __half_raw{
-                   __ocml_floor_f16(static_cast<__half_raw>(x).data)};
-            }
-            inline
-            __device__
-            __half hrint(__half x)
-            {
-                return __half_raw{
-                    __ocml_rint_f16(static_cast<__half_raw>(x).data)};
-            }
-            inline
-            __device__
-            __half hsin(__half x)
-            {
-                return __half_raw{
-                    __ocml_sin_f16(static_cast<__half_raw>(x).data)};
-            }
-            inline
-            __device__
-            __half hcos(__half x)
-            {
-                return __half_raw{
-                    __ocml_cos_f16(static_cast<__half_raw>(x).data)};
-            }
-            inline
-            __device__
-            __half hexp(__half x)
-            {
-                return __half_raw{
-                    __ocml_exp_f16(static_cast<__half_raw>(x).data)};
-            }
-            inline
-            __device__
-            __half hexp2(__half x)
-            {
-                return __half_raw{
-                    __ocml_exp2_f16(static_cast<__half_raw>(x).data)};
-            }
-            inline
-            __device__
-            __half hexp10(__half x)
-            {
-                return __half_raw{
-                    __ocml_exp10_f16(static_cast<__half_raw>(x).data)};
-            }
-            inline
-            __device__
-            __half hlog2(__half x)
-            {
-                return __half_raw{
-                    __ocml_log2_f16(static_cast<__half_raw>(x).data)};
-            }
-            inline
-            __device__
-            __half hlog(__half x)
-            {
-                return __half_raw{
-                    __ocml_log_f16(static_cast<__half_raw>(x).data)};
-            }
-            inline
-            __device__
-            __half hlog10(__half x)
-            {
-                return __half_raw{
-                    __ocml_log10_f16(static_cast<__half_raw>(x).data)};
-            }
-            inline
-            __device__
-            __half hrcp(__half x)
-            {
-                return __half_raw{
-                    __llvm_amdgcn_rcp_f16(static_cast<__half_raw>(x).data)};
-            }
-            inline
-            __device__
-            __half hrsqrt(__half x)
-            {
-                return __half_raw{
-                    __ocml_rsqrt_f16(static_cast<__half_raw>(x).data)};
-            }
-            inline
-            __device__
-            __half hsqrt(__half x)
-            {
-                return __half_raw{
-                    __ocml_sqrt_f16(static_cast<__half_raw>(x).data)};
-            }
-            inline
-            __device__
-            bool __hisinf(__half x)
-            {
-                return __ocml_isinf_f16(static_cast<__half_raw>(x).data);
-            }
-            inline
-            __device__
-            bool __hisnan(__half x)
-            {
-                return __ocml_isnan_f16(static_cast<__half_raw>(x).data);
-            }
-            inline
-            __device__
-            __half __hneg(__half x)
-            {
-                return __half_raw{-static_cast<__half_raw>(x).data};
-            }
-
-            inline
-            __host__ __device__
-            __half2 h2trunc(__half2 x)
-            {
-                return __half2_raw{__ocml_trunc_2f16(x)};
-            }
-            inline
-            __host__ __device__
-            __half2 h2ceil(__half2 x)
-            {
-                return __half2_raw{__ocml_ceil_2f16(x)};
-            }
-            inline
-            __host__ __device__
-            __half2 h2floor(__half2 x)
-            {
-                return __half2_raw{__ocml_floor_2f16(x)};
-            }
-            inline
-            __host__ __device__
-            __half2 h2rint(__half2 x)
-            {
-                return __half2_raw{__ocml_rint_2f16(x)};
-            }
-            inline
-            __host__ __device__
-            __half2 h2sin(__half2 x)
-            {
-                return __half2_raw{__ocml_sin_2f16(x)};
-            }
-            inline
-            __host__ __device__
-            __half2 h2cos(__half2 x)
-            {
-                return __half2_raw{__ocml_cos_2f16(x)};
-            }
-            inline
-            __host__ __device__
-            __half2 h2exp(__half2 x)
-            {
-                return __half2_raw{__ocml_exp_2f16(x)};
-            }
-            inline
-            __host__ __device__
-            __half2 h2exp2(__half2 x)
-            {
-                return __half2_raw{__ocml_exp2_2f16(x)};
-            }
-            inline
-            __host__ __device__
-            __half2 h2exp10(__half2 x)
-            {
-                return __half2_raw{__ocml_exp10_2f16(x)};
-            }
-            inline
-            __host__ __device__
-            __half2 h2log2(__half2 x)
-            {
-                return __half2_raw{__ocml_log2_2f16(x)};
-            }
-            inline
-            __host__ __device__
-            __half2 h2log(__half2 x) { return __ocml_log_2f16(x); }
-            inline
-            __host__ __device__
-            __half2 h2log10(__half2 x) { return __ocml_log10_2f16(x); }
-            inline
-            __host__ __device__
-            __half2 h2rcp(__half2 x) { return __llvm_amdgcn_rcp_2f16(x); }
-            inline
-            __host__ __device__
-            __half2 h2rsqrt(__half2 x) { return __ocml_rsqrt_2f16(x); }
-            inline
-            __host__ __device__
-            __half2 h2sqrt(__half2 x) { return __ocml_sqrt_2f16(x); }
-            inline
-            __host__ __device__
-            __half2 __hisinf2(__half2 x)
-            {
-                auto r = __ocml_isinf_2f16(x);
-                return __half2_raw{_Float16_2{
-                    static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}};
-            }
-            inline
-            __host__  __device__
-            __half2 __hisnan2(__half2 x)
-            {
-                auto r = __ocml_isnan_2f16(x);
-                return __half2_raw{_Float16_2{
-                    static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}};
-            }
-            inline
-            __host__ __device__
-            __half2 __hneg2(__half2 x)
-            {
-                return __half2_raw{-static_cast<__half2_raw>(x).data};
-            }
-        } // Anonymous namespace.
-
-        #if !defined(HIP_NO_HALF)
-            using half = __half;
-            using half2 = __half2;
-        #endif
-    #endif // defined(__cplusplus)
-#elif defined(__GNUC__)
-    #include "hip_fp16_gcc.h"
-#endif // !defined(__clang__) && defined(__GNUC__)
-
-#endif // HIP_INCLUDE_HIP_HCC_DETAIL_HIP_FP16_H
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_fp16_gcc.h b/third_party/rocm/include/hip/hcc_detail/hip_fp16_gcc.h
deleted file mode 100644
index 480fd81..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_fp16_gcc.h
+++ /dev/null
@@ -1,254 +0,0 @@
-#pragma once
-
-#if defined(__cplusplus)
-    #include <cstring>
-#endif
-
-struct __half_raw {
-    unsigned short x;
-};
-
-struct __half2_raw {
-    unsigned short x;
-    unsigned short y;
-};
-
-#if defined(__cplusplus)
-    struct __half;
-
-    __half __float2half(float);
-    float __half2float(__half);
-
-    // BEGIN STRUCT __HALF
-    struct __half {
-    protected:
-        unsigned short __x;
-    public:
-        // CREATORS
-        __half() = default;
-        __half(const __half_raw& x) : __x{x.x} {}
-        #if !defined(__HIP_NO_HALF_CONVERSIONS__)
-            __half(float x) : __x{__float2half(x).__x} {}
-            __half(double x) : __x{__float2half(x).__x} {}
-        #endif
-        __half(const __half&) = default;
-        __half(__half&&) = default;
-        ~__half() = default;
-
-        // MANIPULATORS
-        __half& operator=(const __half&) = default;
-        __half& operator=(__half&&) = default;
-        __half& operator=(const __half_raw& x) { __x = x.x; return *this; }
-        #if !defined(__HIP_NO_HALF_CONVERSIONS__)
-            __half& operator=(float x)
-            {
-                __x = __float2half(x).__x;
-                return *this;
-            }
-            __half& operator=(double x)
-            {
-                return *this = static_cast<float>(x);
-            }
-        #endif
-
-        // ACCESSORS
-        operator float() const { return __half2float(*this); }
-        operator __half_raw() const { return __half_raw{__x}; }
-    };
-    // END STRUCT __HALF
-
-    // BEGIN STRUCT __HALF2
-    struct __half2 {
-    protected:
-        __half x;
-        __half y;
-    public:
-        // CREATORS
-        __half2() = default;
-        __half2(const __half2_raw& ix)
-            :
-            x{reinterpret_cast<const __half&>(ix.x)},
-            y{reinterpret_cast<const __half&>(ix.y)}
-        {}
-        __half2(const __half& ix, const __half& iy) : x{ix}, y{iy} {}
-        __half2(const __half2&) = default;
-        __half2(__half2&&) = default;
-        ~__half2() = default;
-
-        // MANIPULATORS
-        __half2& operator=(const __half2&) = default;
-        __half2& operator=(__half2&&) = default;
-        __half2& operator=(const __half2_raw& ix)
-        {
-            x = reinterpret_cast<const __half_raw&>(ix.x);
-            y = reinterpret_cast<const __half_raw&>(ix.y);
-            return *this;
-        }
-
-        // ACCESSORS
-        operator __half2_raw() const
-        {
-            return __half2_raw{
-                reinterpret_cast<const unsigned short&>(x),
-                reinterpret_cast<const unsigned short&>(y)};
-        }
-    };
-    // END STRUCT __HALF2
-
-    inline
-    unsigned short __internal_float2half(
-        float flt, unsigned int& sgn, unsigned int& rem)
-    {
-        unsigned int x{};
-        std::memcpy(&x, &flt, sizeof(flt));
-
-        unsigned int u = (x & 0x7fffffffU);
-        sgn = ((x >> 16) & 0x8000U);
-
-        // NaN/+Inf/-Inf
-        if (u >= 0x7f800000U) {
-            rem = 0;
-            return static_cast<unsigned short>(
-                (u == 0x7f800000U) ? (sgn | 0x7c00U) : 0x7fffU);
-        }
-        // Overflows
-        if (u > 0x477fefffU) {
-            rem = 0x80000000U;
-            return static_cast<unsigned short>(sgn | 0x7bffU);
-        }
-        // Normal numbers
-        if (u >= 0x38800000U) {
-            rem = u << 19;
-            u -= 0x38000000U;
-            return static_cast<unsigned short>(sgn | (u >> 13));
-        }
-        // +0/-0
-        if (u < 0x33000001U) {
-            rem = u;
-            return static_cast<unsigned short>(sgn);
-        }
-        // Denormal numbers
-        unsigned int exponent = u >> 23;
-        unsigned int mantissa = (u & 0x7fffffU);
-        unsigned int shift = 0x7eU - exponent;
-        mantissa |= 0x800000U;
-        rem = mantissa << (32 - shift);
-        return static_cast<unsigned short>(sgn | (mantissa >> shift));
-    }
-
-    inline
-    __half __float2half(float x)
-    {
-        __half_raw r;
-        unsigned int sgn{};
-        unsigned int rem{};
-        r.x = __internal_float2half(x, sgn, rem);
-        if (rem > 0x80000000U || (rem == 0x80000000U && (r.x & 0x1))) ++r.x;
-
-        return r;
-    }
-
-    inline
-    __half __float2half_rn(float x) { return __float2half(x); }
-
-    inline
-    __half __float2half_rz(float x)
-    {
-        __half_raw r;
-        unsigned int sgn{};
-        unsigned int rem{};
-        r.x = __internal_float2half(x, sgn, rem);
-
-        return r;
-    }
-
-    inline
-    __half __float2half_rd(float x)
-    {
-        __half_raw r;
-        unsigned int sgn{};
-        unsigned int rem{};
-        r.x = __internal_float2half(x, sgn, rem);
-        if (rem && sgn) ++r.x;
-
-        return r;
-    }
-
-    inline
-    __half __float2half_ru(float x)
-    {
-        __half_raw r;
-        unsigned int sgn{};
-        unsigned int rem{};
-        r.x = __internal_float2half(x, sgn, rem);
-        if (rem && !sgn) ++r.x;
-
-        return r;
-    }
-
-    inline
-    __half2 __float2half2_rn(float x)
-    {
-        return __half2{__float2half_rn(x), __float2half_rn(x)};
-    }
-
-    inline
-    __half2 __floats2half2_rn(float x, float y)
-    {
-        return __half2{__float2half_rn(x), __float2half_rn(y)};
-    }
-
-    inline
-    float __internal_half2float(unsigned short x)
-    {
-        unsigned int sign = ((x >> 15) & 1);
-        unsigned int exponent = ((x >> 10) & 0x1f);
-        unsigned int mantissa = ((x & 0x3ff) << 13);
-
-        if (exponent == 0x1fU) { /* NaN or Inf */
-            mantissa = (mantissa ? (sign = 0, 0x7fffffU) : 0);
-            exponent = 0xffU;
-        } else if (!exponent) { /* Denorm or Zero */
-            if (mantissa) {
-                unsigned int msb;
-                exponent = 0x71U;
-                do {
-                    msb = (mantissa & 0x400000U);
-                    mantissa <<= 1; /* normalize */
-                    --exponent;
-                } while (!msb);
-                mantissa &= 0x7fffffU; /* 1.mantissa is implicit */
-            }
-        } else {
-            exponent += 0x70U;
-        }
-        unsigned int u = ((sign << 31) | (exponent << 23) | mantissa);
-        float f;
-        memcpy(&f, &u, sizeof(u));
-
-        return f;
-    }
-
-    inline
-    float __half2float(__half x)
-    {
-        return __internal_half2float(static_cast<__half_raw>(x).x);
-    }
-
-    inline
-    float __low2float(__half2 x)
-    {
-        return __internal_half2float(static_cast<__half2_raw>(x).x);
-    }
-
-    inline
-    float __high2float(__half2 x)
-    {
-        return __internal_half2float(static_cast<__half2_raw>(x).y);
-    }
-
-    #if !defined(HIP_NO_HALF)
-        using half = __half;
-        using half2 = __half2;
-    #endif
-#endif // defined(__cplusplus)
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_fp16_math_fwd.h b/third_party/rocm/include/hip/hcc_detail/hip_fp16_math_fwd.h
deleted file mode 100644
index 53a2c66..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_fp16_math_fwd.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#pragma once
-
-// /*
-// Half Math Functions
-// */
-
-#include "host_defines.h"
-#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
-extern "C"
-{
-    __device__ __attribute__((const)) _Float16 __ocml_ceil_f16(_Float16);
-    __device__ _Float16 __ocml_cos_f16(_Float16);
-    __device__ __attribute__((pure)) _Float16 __ocml_exp_f16(_Float16);
-    __device__ __attribute__((pure)) _Float16 __ocml_exp10_f16(_Float16);
-    __device__ __attribute__((pure)) _Float16 __ocml_exp2_f16(_Float16);
-    __device__ __attribute__((const)) _Float16 __ocml_floor_f16(_Float16);
-    __device__ __attribute__((const))
-    _Float16 __ocml_fma_f16(_Float16, _Float16, _Float16);
-    __device__ __attribute__((const)) _Float16 __ocml_fabs_f16(_Float16);
-    __device__ __attribute__((const)) int __ocml_isinf_f16(_Float16);
-    __device__ __attribute__((const)) int __ocml_isnan_f16(_Float16);
-    __device__ __attribute__((pure)) _Float16 __ocml_log_f16(_Float16);
-    __device__ __attribute__((pure)) _Float16 __ocml_log10_f16(_Float16);
-    __device__ __attribute__((pure)) _Float16 __ocml_log2_f16(_Float16);
-    __device__ __attribute__((pure)) _Float16 __ocml_pown_f16(_Float16, int);
-    __device__ __attribute__((const)) _Float16 __llvm_amdgcn_rcp_f16(_Float16);
-    __device__ __attribute__((const)) _Float16 __ocml_rint_f16(_Float16);
-    __device__ __attribute__((const)) _Float16 __ocml_rsqrt_f16(_Float16);
-    __device__ _Float16 __ocml_sin_f16(_Float16);
-    __device__ __attribute__((const)) _Float16 __ocml_sqrt_f16(_Float16);
-    __device__ __attribute__((const)) _Float16 __ocml_trunc_f16(_Float16);
-
-    typedef _Float16 __2f16 __attribute__((ext_vector_type(2)));
-    typedef short __2i16 __attribute__((ext_vector_type(2)));
-
-    #if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__
-    __device__ __attribute__((const)) float __ockl_fdot2(__2f16 a, __2f16 b, float c, bool s);
-    #endif
-
-    __device__ __attribute__((const)) __2f16 __ocml_ceil_2f16(__2f16);
-    __device__ __attribute__((const)) __2f16 __ocml_fabs_2f16(__2f16);
-    __device__ __2f16 __ocml_cos_2f16(__2f16);
-    __device__ __attribute__((pure)) __2f16 __ocml_exp_2f16(__2f16);
-    __device__ __attribute__((pure)) __2f16 __ocml_exp10_2f16(__2f16);
-    __device__ __attribute__((pure)) __2f16 __ocml_exp2_2f16(__2f16);
-    __device__ __attribute__((const)) __2f16 __ocml_floor_2f16(__2f16);
-    __device__ __attribute__((const)) __2f16 __ocml_fma_2f16(__2f16, __2f16, __2f16);
-    __device__ __attribute__((const)) __2i16 __ocml_isinf_2f16(__2f16);
-    __device__ __attribute__((const)) __2i16 __ocml_isnan_2f16(__2f16);
-    __device__ __attribute__((pure)) __2f16 __ocml_log_2f16(__2f16);
-    __device__ __attribute__((pure)) __2f16 __ocml_log10_2f16(__2f16);
-    __device__ __attribute__((pure)) __2f16 __ocml_log2_2f16(__2f16);
-    __device__ inline
-    __2f16 __llvm_amdgcn_rcp_2f16(__2f16 x) // Not currently exposed by ROCDL.
-    {
-        return __2f16{__llvm_amdgcn_rcp_f16(x.x), __llvm_amdgcn_rcp_f16(x.y)};
-    }
-    __device__ __attribute__((const)) __2f16 __ocml_rint_2f16(__2f16);
-    __device__ __attribute__((const)) __2f16 __ocml_rsqrt_2f16(__2f16);
-    __device__ __2f16 __ocml_sin_2f16(__2f16);
-    __device__ __attribute__((const)) __2f16 __ocml_sqrt_2f16(__2f16);
-    __device__ __attribute__((const)) __2f16 __ocml_trunc_2f16(__2f16);
-}
-#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_ldg.h b/third_party/rocm/include/hip/hcc_detail/hip_ldg.h
deleted file mode 100644
index ab86955..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_ldg.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_LDG_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_LDG_H
-
-#if defined(__HCC_OR_HIP_CLANG__)
-#if __hcc_workweek__ >= 16164 || __HIP_CLANG_ONLY__
-#include "hip_vector_types.h"
-#include "host_defines.h"
-
-__device__ inline static char __ldg(const char* ptr) { return *ptr; }
-
-__device__ inline static char2 __ldg(const char2* ptr) { return *ptr; }
-
-__device__ inline static char4 __ldg(const char4* ptr) { return *ptr; }
-
-__device__ inline static signed char __ldg(const signed char* ptr) { return ptr[0]; }
-
-__device__ inline static unsigned char __ldg(const unsigned char* ptr) { return ptr[0]; }
-
-
-__device__ inline static short __ldg(const short* ptr) { return ptr[0]; }
-
-__device__ inline static short2 __ldg(const short2* ptr) { return ptr[0]; }
-
-__device__ inline static short4 __ldg(const short4* ptr) { return ptr[0]; }
-
-__device__ inline static unsigned short __ldg(const unsigned short* ptr) { return ptr[0]; }
-
-
-__device__ inline static int __ldg(const int* ptr) { return ptr[0]; }
-
-__device__ inline static int2 __ldg(const int2* ptr) { return ptr[0]; }
-
-__device__ inline static int4 __ldg(const int4* ptr) { return ptr[0]; }
-
-__device__ inline static unsigned int __ldg(const unsigned int* ptr) { return ptr[0]; }
-
-
-__device__ inline static long __ldg(const long* ptr) { return ptr[0]; }
-
-__device__ inline static unsigned long __ldg(const unsigned long* ptr) { return ptr[0]; }
-
-
-__device__ inline static long long __ldg(const long long* ptr) { return ptr[0]; }
-
-__device__ inline static longlong2 __ldg(const longlong2* ptr) { return ptr[0]; }
-
-__device__ inline static unsigned long long __ldg(const unsigned long long* ptr) { return ptr[0]; }
-
-
-__device__ inline static uchar2 __ldg(const uchar2* ptr) { return ptr[0]; }
-
-__device__ inline static uchar4 __ldg(const uchar4* ptr) { return ptr[0]; }
-
-
-__device__ inline static ushort2 __ldg(const ushort2* ptr) { return ptr[0]; }
-
-
-__device__ inline static uint2 __ldg(const uint2* ptr) { return ptr[0]; }
-
-__device__ inline static uint4 __ldg(const uint4* ptr) { return ptr[0]; }
-
-
-__device__ inline static ulonglong2 __ldg(const ulonglong2* ptr) { return ptr[0]; }
-
-
-__device__ inline static float __ldg(const float* ptr) { return ptr[0]; }
-
-__device__ inline static float2 __ldg(const float2* ptr) { return ptr[0]; }
-
-__device__ inline static float4 __ldg(const float4* ptr) { return ptr[0]; }
-
-
-__device__ inline static double __ldg(const double* ptr) { return ptr[0]; }
-
-__device__ inline static double2 __ldg(const double2* ptr) { return ptr[0]; }
-
-#endif  // __hcc_workweek__ || __HIP_CLANG_ONLY__
-
-#endif  // defined(__HCC_OR_HIP_CLANG__)
-
-#endif  // HIP_LDG_H
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_memory.h b/third_party/rocm/include/hip/hcc_detail/hip_memory.h
deleted file mode 100644
index 0c00614..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_memory.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_MEMORY_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_MEMORY_H
-
-// Implementation of malloc and free device functions.
-// HIP heap is implemented as a global array with fixed size. Users may define
-// __HIP_SIZE_OF_PAGE and __HIP_NUM_PAGES to have a larger heap.
-
-#if (__HCC__ || __HIP__) && __HIP_ENABLE_DEVICE_MALLOC__
-
-// Size of page in bytes.
-#ifndef __HIP_SIZE_OF_PAGE
-#define __HIP_SIZE_OF_PAGE 64
-#endif
-
-// Total number of pages
-#ifndef __HIP_NUM_PAGES
-#define __HIP_NUM_PAGES (16 * 64 * 64)
-#endif
-
-#define __HIP_SIZE_OF_HEAP (__HIP_NUM_PAGES * __HIP_SIZE_OF_PAGE)
-
-#if __HIP__ && __HIP_DEVICE_COMPILE__
-__attribute__((weak)) __device__ char __hip_device_heap[__HIP_SIZE_OF_HEAP];
-__attribute__((weak)) __device__
-    uint32_t __hip_device_page_flag[__HIP_NUM_PAGES];
-#else
-extern __device__ char __hip_device_heap[];
-extern __device__ uint32_t __hip_device_page_flag[];
-#endif
-
-extern "C" inline __device__ void* __hip_malloc(size_t size) {
-    char* heap = (char*)__hip_device_heap;
-    if (size > __HIP_SIZE_OF_HEAP) {
-        return (void*)nullptr;
-    }
-    uint32_t totalThreads =
-        hipBlockDim_x * hipGridDim_x * hipBlockDim_y
-        * hipGridDim_y * hipBlockDim_z * hipGridDim_z;
-    uint32_t currentWorkItem = hipThreadIdx_x + hipBlockDim_x * hipBlockIdx_x
-        + (hipThreadIdx_y + hipBlockDim_y * hipBlockIdx_y) * hipBlockDim_x
-        + (hipThreadIdx_z + hipBlockDim_z * hipBlockIdx_z) * hipBlockDim_x
-        * hipBlockDim_y;
-
-    uint32_t numHeapsPerWorkItem = __HIP_NUM_PAGES / totalThreads;
-    uint32_t heapSizePerWorkItem = __HIP_SIZE_OF_HEAP / totalThreads;
-
-    uint32_t stride = size / __HIP_SIZE_OF_PAGE;
-    uint32_t start = numHeapsPerWorkItem * currentWorkItem;
-
-    uint32_t k = 0;
-
-    while (__hip_device_page_flag[k] > 0) {
-        k++;
-    }
-
-    for (uint32_t i = 0; i < stride - 1; i++) {
-        __hip_device_page_flag[i + start + k] = 1;
-    }
-
-    __hip_device_page_flag[start + stride - 1 + k] = 2;
-
-    void* ptr = (void*)(heap
-        + heapSizePerWorkItem * currentWorkItem + k * __HIP_SIZE_OF_PAGE);
-
-    return ptr;
-}
-
-extern "C" inline __device__ void* __hip_free(void* ptr) {
-    if (ptr == nullptr) {
-        return nullptr;
-    }
-
-    uint32_t offsetByte = (uint64_t)ptr - (uint64_t)__hip_device_heap;
-    uint32_t offsetPage = offsetByte / __HIP_SIZE_OF_PAGE;
-
-    while (__hip_device_page_flag[offsetPage] != 0) {
-        if (__hip_device_page_flag[offsetPage] == 2) {
-            __hip_device_page_flag[offsetPage] = 0;
-            offsetPage++;
-            break;
-        } else {
-            __hip_device_page_flag[offsetPage] = 0;
-            offsetPage++;
-        }
-    }
-
-    return nullptr;
-}
-
-#endif
-
-#endif // HIP_INCLUDE_HIP_HCC_DETAIL_HIP_MEMORY_H
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_prof_str.h b/third_party/rocm/include/hip/hcc_detail/hip_prof_str.h
deleted file mode 100644
index cb297b2..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_prof_str.h
+++ /dev/null
@@ -1,5127 +0,0 @@
-// automatically generated sources
-#ifndef _HIP_PROF_STR_H
-#define _HIP_PROF_STR_H
-#define HIP_PROF_VER 1
-
-// Dummy API primitives
-#define INIT_NONE_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefGetAddress_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefSetBorderColor_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipMemcpyDtoA_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipArrayGetDescriptor_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexObjectGetResourceViewDesc_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipMemcpyAtoHAsync_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipDestroyTextureObject_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipArray3DGetDescriptor_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefSetAddress_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipArrayDestroy_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefGetMaxAnisotropy_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefSetMipmapFilterMode_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipDeviceGetCount_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipMemcpyArrayToArray_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipBindTexture2D_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipCreateTextureObject_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipMemcpyHtoAAsync_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipMemcpyAtoA_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipMemcpyAtoD_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipBindTextureToMipmappedArray_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefGetMipmapLevelClamp_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipBindTextureToArray_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefSetFlags_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefSetFormat_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexObjectGetTextureDesc_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexObjectDestroy_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipMemcpy2DArrayToArray_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefGetArray_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipGetTextureReference_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipMipmappedArrayDestroy_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefGetFilterMode_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefGetFormat_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefSetArray_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipMemcpyToArrayAsync_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefSetAddress2D_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipGetTextureObjectResourceViewDesc_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefGetFlags_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipUnbindTexture_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefGetMipmapLevelBias_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefSetFilterMode_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipGetTextureAlignmentOffset_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipMipmappedArrayGetLevel_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipCreateSurfaceObject_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipMipmappedArrayCreate_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexObjectGetResourceDesc_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipGetChannelDesc_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefGetAddressMode_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipGetTextureObjectResourceDesc_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipModuleLaunchKernelExt_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipMemcpy2DToArrayAsync_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefGetBorderColor_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipDestroySurfaceObject_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefGetMipmapFilterMode_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefSetMaxAnisotropy_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexObjectCreate_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefSetAddressMode_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefSetMipmapLevelBias_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipMemcpyFromArrayAsync_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipBindTexture_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefSetMipmappedArray_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefGetMipmappedArray_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipSetValidDevices_CB_ARGS_DATA(cb_data) {};
-#define INIT_ihipModuleLaunchKernel_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipTexRefSetMipmapLevelClamp_CB_ARGS_DATA(cb_data) {};
-#define INIT_hipGetTextureObjectTextureDesc_CB_ARGS_DATA(cb_data) {};
-
-// HIP API callbacks ID enumaration
-enum hip_api_id_t {
-  HIP_API_ID_hipDrvMemcpy3DAsync = 0,
-  HIP_API_ID_hipDeviceEnablePeerAccess = 1,
-  HIP_API_ID_hipFuncSetSharedMemConfig = 2,
-  HIP_API_ID_hipMemcpyToSymbolAsync = 3,
-  HIP_API_ID_hipMallocPitch = 4,
-  HIP_API_ID_hipMalloc = 5,
-  HIP_API_ID_hipMemsetD16 = 6,
-  HIP_API_ID_hipExtStreamGetCUMask = 7,
-  HIP_API_ID_hipEventRecord = 8,
-  HIP_API_ID_hipCtxSynchronize = 9,
-  HIP_API_ID_hipSetDevice = 10,
-  HIP_API_ID_hipCtxGetApiVersion = 11,
-  HIP_API_ID_hipMemcpyFromSymbolAsync = 12,
-  HIP_API_ID_hipExtGetLinkTypeAndHopCount = 13,
-  HIP_API_ID___hipPopCallConfiguration = 14,
-  HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor = 15,
-  HIP_API_ID_hipMemset3D = 16,
-  HIP_API_ID_hipStreamCreateWithPriority = 17,
-  HIP_API_ID_hipMemcpy2DToArray = 18,
-  HIP_API_ID_hipMemsetD8Async = 19,
-  HIP_API_ID_hipCtxGetCacheConfig = 20,
-  HIP_API_ID_hipModuleGetFunction = 21,
-  HIP_API_ID_hipStreamWaitEvent = 22,
-  HIP_API_ID_hipDeviceGetStreamPriorityRange = 23,
-  HIP_API_ID_hipModuleLoad = 24,
-  HIP_API_ID_hipDevicePrimaryCtxSetFlags = 25,
-  HIP_API_ID_hipLaunchCooperativeKernel = 26,
-  HIP_API_ID_hipLaunchCooperativeKernelMultiDevice = 27,
-  HIP_API_ID_hipMemcpyAsync = 28,
-  HIP_API_ID_hipMalloc3DArray = 29,
-  HIP_API_ID_hipMallocHost = 30,
-  HIP_API_ID_hipCtxGetCurrent = 31,
-  HIP_API_ID_hipDevicePrimaryCtxGetState = 32,
-  HIP_API_ID_hipEventQuery = 33,
-  HIP_API_ID_hipEventCreate = 34,
-  HIP_API_ID_hipMemGetAddressRange = 35,
-  HIP_API_ID_hipMemcpyFromSymbol = 36,
-  HIP_API_ID_hipArrayCreate = 37,
-  HIP_API_ID_hipStreamAttachMemAsync = 38,
-  HIP_API_ID_hipStreamGetFlags = 39,
-  HIP_API_ID_hipMallocArray = 40,
-  HIP_API_ID_hipCtxGetSharedMemConfig = 41,
-  HIP_API_ID_hipDeviceDisablePeerAccess = 42,
-  HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSize = 43,
-  HIP_API_ID_hipMemPtrGetInfo = 44,
-  HIP_API_ID_hipFuncGetAttribute = 45,
-  HIP_API_ID_hipCtxGetFlags = 46,
-  HIP_API_ID_hipStreamDestroy = 47,
-  HIP_API_ID___hipPushCallConfiguration = 48,
-  HIP_API_ID_hipMemset3DAsync = 49,
-  HIP_API_ID_hipDeviceGetPCIBusId = 50,
-  HIP_API_ID_hipInit = 51,
-  HIP_API_ID_hipMemcpyAtoH = 52,
-  HIP_API_ID_hipStreamGetPriority = 53,
-  HIP_API_ID_hipMemset2D = 54,
-  HIP_API_ID_hipMemset2DAsync = 55,
-  HIP_API_ID_hipDeviceCanAccessPeer = 56,
-  HIP_API_ID_hipLaunchByPtr = 57,
-  HIP_API_ID_hipMemPrefetchAsync = 58,
-  HIP_API_ID_hipCtxDestroy = 59,
-  HIP_API_ID_hipMemsetD16Async = 60,
-  HIP_API_ID_hipModuleUnload = 61,
-  HIP_API_ID_hipHostUnregister = 62,
-  HIP_API_ID_hipProfilerStop = 63,
-  HIP_API_ID_hipExtStreamCreateWithCUMask = 64,
-  HIP_API_ID_hipStreamSynchronize = 65,
-  HIP_API_ID_hipFreeHost = 66,
-  HIP_API_ID_hipDeviceSetCacheConfig = 67,
-  HIP_API_ID_hipGetErrorName = 68,
-  HIP_API_ID_hipMemcpyHtoD = 69,
-  HIP_API_ID_hipModuleGetGlobal = 70,
-  HIP_API_ID_hipMemcpyHtoA = 71,
-  HIP_API_ID_hipCtxCreate = 72,
-  HIP_API_ID_hipMemcpy2D = 73,
-  HIP_API_ID_hipIpcCloseMemHandle = 74,
-  HIP_API_ID_hipChooseDevice = 75,
-  HIP_API_ID_hipDeviceSetSharedMemConfig = 76,
-  HIP_API_ID_hipMallocMipmappedArray = 77,
-  HIP_API_ID_hipSetupArgument = 78,
-  HIP_API_ID_hipIpcGetEventHandle = 79,
-  HIP_API_ID_hipFreeArray = 80,
-  HIP_API_ID_hipCtxSetCacheConfig = 81,
-  HIP_API_ID_hipFuncSetCacheConfig = 82,
-  HIP_API_ID_hipLaunchKernel = 83,
-  HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags = 84,
-  HIP_API_ID_hipModuleGetTexRef = 85,
-  HIP_API_ID_hipFuncSetAttribute = 86,
-  HIP_API_ID_hipEventElapsedTime = 87,
-  HIP_API_ID_hipConfigureCall = 88,
-  HIP_API_ID_hipMemAdvise = 89,
-  HIP_API_ID_hipMemcpy3DAsync = 90,
-  HIP_API_ID_hipEventDestroy = 91,
-  HIP_API_ID_hipCtxPopCurrent = 92,
-  HIP_API_ID_hipGetSymbolAddress = 93,
-  HIP_API_ID_hipHostGetFlags = 94,
-  HIP_API_ID_hipHostMalloc = 95,
-  HIP_API_ID_hipCtxSetSharedMemConfig = 96,
-  HIP_API_ID_hipFreeMipmappedArray = 97,
-  HIP_API_ID_hipMemGetInfo = 98,
-  HIP_API_ID_hipDeviceReset = 99,
-  HIP_API_ID_hipMemset = 100,
-  HIP_API_ID_hipMemsetD8 = 101,
-  HIP_API_ID_hipMemcpyParam2DAsync = 102,
-  HIP_API_ID_hipHostRegister = 103,
-  HIP_API_ID_hipDriverGetVersion = 104,
-  HIP_API_ID_hipArray3DCreate = 105,
-  HIP_API_ID_hipIpcOpenMemHandle = 106,
-  HIP_API_ID_hipGetLastError = 107,
-  HIP_API_ID_hipGetDeviceFlags = 108,
-  HIP_API_ID_hipDeviceGetSharedMemConfig = 109,
-  HIP_API_ID_hipDrvMemcpy3D = 110,
-  HIP_API_ID_hipMemcpy2DFromArray = 111,
-  HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags = 112,
-  HIP_API_ID_hipSetDeviceFlags = 113,
-  HIP_API_ID_hipHccModuleLaunchKernel = 114,
-  HIP_API_ID_hipFree = 115,
-  HIP_API_ID_hipOccupancyMaxPotentialBlockSize = 116,
-  HIP_API_ID_hipDeviceGetAttribute = 117,
-  HIP_API_ID_hipDeviceComputeCapability = 118,
-  HIP_API_ID_hipCtxDisablePeerAccess = 119,
-  HIP_API_ID_hipMallocManaged = 120,
-  HIP_API_ID_hipDeviceGetByPCIBusId = 121,
-  HIP_API_ID_hipIpcGetMemHandle = 122,
-  HIP_API_ID_hipMemcpyHtoDAsync = 123,
-  HIP_API_ID_hipCtxGetDevice = 124,
-  HIP_API_ID_hipMemcpyDtoD = 125,
-  HIP_API_ID_hipModuleLoadData = 126,
-  HIP_API_ID_hipDevicePrimaryCtxRelease = 127,
-  HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessor = 128,
-  HIP_API_ID_hipCtxSetCurrent = 129,
-  HIP_API_ID_hipGetErrorString = 130,
-  HIP_API_ID_hipStreamCreate = 131,
-  HIP_API_ID_hipDevicePrimaryCtxRetain = 132,
-  HIP_API_ID_hipDeviceGet = 133,
-  HIP_API_ID_hipStreamCreateWithFlags = 134,
-  HIP_API_ID_hipMemcpyFromArray = 135,
-  HIP_API_ID_hipMemcpy2DAsync = 136,
-  HIP_API_ID_hipFuncGetAttributes = 137,
-  HIP_API_ID_hipGetSymbolSize = 138,
-  HIP_API_ID_hipHostFree = 139,
-  HIP_API_ID_hipEventCreateWithFlags = 140,
-  HIP_API_ID_hipStreamQuery = 141,
-  HIP_API_ID_hipMemcpy3D = 142,
-  HIP_API_ID_hipMemcpyToSymbol = 143,
-  HIP_API_ID_hipMemcpy = 144,
-  HIP_API_ID_hipPeekAtLastError = 145,
-  HIP_API_ID_hipExtLaunchMultiKernelMultiDevice = 146,
-  HIP_API_ID_hipHostAlloc = 147,
-  HIP_API_ID_hipStreamAddCallback = 148,
-  HIP_API_ID_hipMemcpyToArray = 149,
-  HIP_API_ID_hipMemsetD32 = 150,
-  HIP_API_ID_hipExtModuleLaunchKernel = 151,
-  HIP_API_ID_hipDeviceSynchronize = 152,
-  HIP_API_ID_hipDeviceGetCacheConfig = 153,
-  HIP_API_ID_hipMalloc3D = 154,
-  HIP_API_ID_hipPointerGetAttributes = 155,
-  HIP_API_ID_hipMemsetAsync = 156,
-  HIP_API_ID_hipDeviceGetName = 157,
-  HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSizeWithFlags = 158,
-  HIP_API_ID_hipCtxPushCurrent = 159,
-  HIP_API_ID_hipMemcpyPeer = 160,
-  HIP_API_ID_hipEventSynchronize = 161,
-  HIP_API_ID_hipMemcpyDtoDAsync = 162,
-  HIP_API_ID_hipProfilerStart = 163,
-  HIP_API_ID_hipExtMallocWithFlags = 164,
-  HIP_API_ID_hipCtxEnablePeerAccess = 165,
-  HIP_API_ID_hipMemAllocHost = 166,
-  HIP_API_ID_hipMemcpyDtoHAsync = 167,
-  HIP_API_ID_hipModuleLaunchKernel = 168,
-  HIP_API_ID_hipMemAllocPitch = 169,
-  HIP_API_ID_hipExtLaunchKernel = 170,
-  HIP_API_ID_hipMemcpy2DFromArrayAsync = 171,
-  HIP_API_ID_hipDeviceGetLimit = 172,
-  HIP_API_ID_hipModuleLoadDataEx = 173,
-  HIP_API_ID_hipRuntimeGetVersion = 174,
-  HIP_API_ID_hipMemRangeGetAttribute = 175,
-  HIP_API_ID_hipDeviceGetP2PAttribute = 176,
-  HIP_API_ID_hipMemcpyPeerAsync = 177,
-  HIP_API_ID_hipGetDeviceProperties = 178,
-  HIP_API_ID_hipMemcpyDtoH = 179,
-  HIP_API_ID_hipMemcpyWithStream = 180,
-  HIP_API_ID_hipDeviceTotalMem = 181,
-  HIP_API_ID_hipHostGetDevicePointer = 182,
-  HIP_API_ID_hipMemRangeGetAttributes = 183,
-  HIP_API_ID_hipMemcpyParam2D = 184,
-  HIP_API_ID_hipDevicePrimaryCtxReset = 185,
-  HIP_API_ID_hipGetMipmappedArrayLevel = 186,
-  HIP_API_ID_hipMemsetD32Async = 187,
-  HIP_API_ID_hipGetDevice = 188,
-  HIP_API_ID_hipGetDeviceCount = 189,
-  HIP_API_ID_hipIpcOpenEventHandle = 190,
-  HIP_API_ID_NUMBER = 191,
-
-  HIP_API_ID_NONE = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipTexRefGetAddress = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipTexRefSetBorderColor = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipMemcpyDtoA = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipArrayGetDescriptor = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipTexObjectGetResourceViewDesc = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipMemcpyAtoHAsync = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipDestroyTextureObject = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipArray3DGetDescriptor = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipTexRefSetAddress = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipArrayDestroy = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipTexRefGetMaxAnisotropy = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipTexRefSetMipmapFilterMode = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipDeviceGetCount = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipMemcpyArrayToArray = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipBindTexture2D = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipCreateTextureObject = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipMemcpyHtoAAsync = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipMemcpyAtoA = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipMemcpyAtoD = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipBindTextureToMipmappedArray = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipTexRefGetMipmapLevelClamp = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipBindTextureToArray = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipTexRefSetFlags = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipTexRefSetFormat = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipTexObjectGetTextureDesc = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipTexObjectDestroy = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipMemcpy2DArrayToArray = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipTexRefGetArray = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipGetTextureReference = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipMipmappedArrayDestroy = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipTexRefGetFilterMode = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipTexRefGetFormat = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipTexRefSetArray = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipMemcpyToArrayAsync = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipTexRefSetAddress2D = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipGetTextureObjectResourceViewDesc = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipTexRefGetFlags = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipUnbindTexture = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipTexRefGetMipmapLevelBias = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipTexRefSetFilterMode = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipGetTextureAlignmentOffset = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipMipmappedArrayGetLevel = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipCreateSurfaceObject = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipMipmappedArrayCreate = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipTexObjectGetResourceDesc = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipGetChannelDesc = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipTexRefGetAddressMode = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipGetTextureObjectResourceDesc = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipModuleLaunchKernelExt = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipMemcpy2DToArrayAsync = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipTexRefGetBorderColor = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipDestroySurfaceObject = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipTexRefGetMipmapFilterMode = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipTexRefSetMaxAnisotropy = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipTexObjectCreate = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipTexRefSetAddressMode = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipTexRefSetMipmapLevelBias = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipMemcpyFromArrayAsync = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipBindTexture = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipTexRefSetMipmappedArray = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipTexRefGetMipmappedArray = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipSetValidDevices = HIP_API_ID_NUMBER,
-  HIP_API_ID_ihipModuleLaunchKernel = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipTexRefSetMipmapLevelClamp = HIP_API_ID_NUMBER,
-  HIP_API_ID_hipGetTextureObjectTextureDesc = HIP_API_ID_NUMBER,
-};
-
-// Return HIP API string by given ID
-static inline const char* hip_api_name(const uint32_t id) {
-  switch(id) {
-    case HIP_API_ID_hipDrvMemcpy3DAsync: return "hipDrvMemcpy3DAsync";
-    case HIP_API_ID_hipDeviceEnablePeerAccess: return "hipDeviceEnablePeerAccess";
-    case HIP_API_ID_hipFuncSetSharedMemConfig: return "hipFuncSetSharedMemConfig";
-    case HIP_API_ID_hipMemcpyToSymbolAsync: return "hipMemcpyToSymbolAsync";
-    case HIP_API_ID_hipMallocPitch: return "hipMallocPitch";
-    case HIP_API_ID_hipMalloc: return "hipMalloc";
-    case HIP_API_ID_hipMemsetD16: return "hipMemsetD16";
-    case HIP_API_ID_hipExtStreamGetCUMask: return "hipExtStreamGetCUMask";
-    case HIP_API_ID_hipEventRecord: return "hipEventRecord";
-    case HIP_API_ID_hipCtxSynchronize: return "hipCtxSynchronize";
-    case HIP_API_ID_hipSetDevice: return "hipSetDevice";
-    case HIP_API_ID_hipCtxGetApiVersion: return "hipCtxGetApiVersion";
-    case HIP_API_ID_hipMemcpyFromSymbolAsync: return "hipMemcpyFromSymbolAsync";
-    case HIP_API_ID_hipExtGetLinkTypeAndHopCount: return "hipExtGetLinkTypeAndHopCount";
-    case HIP_API_ID___hipPopCallConfiguration: return "__hipPopCallConfiguration";
-    case HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor: return "hipModuleOccupancyMaxActiveBlocksPerMultiprocessor";
-    case HIP_API_ID_hipMemset3D: return "hipMemset3D";
-    case HIP_API_ID_hipStreamCreateWithPriority: return "hipStreamCreateWithPriority";
-    case HIP_API_ID_hipMemcpy2DToArray: return "hipMemcpy2DToArray";
-    case HIP_API_ID_hipMemsetD8Async: return "hipMemsetD8Async";
-    case HIP_API_ID_hipCtxGetCacheConfig: return "hipCtxGetCacheConfig";
-    case HIP_API_ID_hipModuleGetFunction: return "hipModuleGetFunction";
-    case HIP_API_ID_hipStreamWaitEvent: return "hipStreamWaitEvent";
-    case HIP_API_ID_hipDeviceGetStreamPriorityRange: return "hipDeviceGetStreamPriorityRange";
-    case HIP_API_ID_hipModuleLoad: return "hipModuleLoad";
-    case HIP_API_ID_hipDevicePrimaryCtxSetFlags: return "hipDevicePrimaryCtxSetFlags";
-    case HIP_API_ID_hipLaunchCooperativeKernel: return "hipLaunchCooperativeKernel";
-    case HIP_API_ID_hipLaunchCooperativeKernelMultiDevice: return "hipLaunchCooperativeKernelMultiDevice";
-    case HIP_API_ID_hipMemcpyAsync: return "hipMemcpyAsync";
-    case HIP_API_ID_hipMalloc3DArray: return "hipMalloc3DArray";
-    case HIP_API_ID_hipMallocHost: return "hipMallocHost";
-    case HIP_API_ID_hipCtxGetCurrent: return "hipCtxGetCurrent";
-    case HIP_API_ID_hipDevicePrimaryCtxGetState: return "hipDevicePrimaryCtxGetState";
-    case HIP_API_ID_hipEventQuery: return "hipEventQuery";
-    case HIP_API_ID_hipEventCreate: return "hipEventCreate";
-    case HIP_API_ID_hipMemGetAddressRange: return "hipMemGetAddressRange";
-    case HIP_API_ID_hipMemcpyFromSymbol: return "hipMemcpyFromSymbol";
-    case HIP_API_ID_hipArrayCreate: return "hipArrayCreate";
-    case HIP_API_ID_hipStreamAttachMemAsync: return "hipStreamAttachMemAsync";
-    case HIP_API_ID_hipStreamGetFlags: return "hipStreamGetFlags";
-    case HIP_API_ID_hipMallocArray: return "hipMallocArray";
-    case HIP_API_ID_hipCtxGetSharedMemConfig: return "hipCtxGetSharedMemConfig";
-    case HIP_API_ID_hipDeviceDisablePeerAccess: return "hipDeviceDisablePeerAccess";
-    case HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSize: return "hipModuleOccupancyMaxPotentialBlockSize";
-    case HIP_API_ID_hipMemPtrGetInfo: return "hipMemPtrGetInfo";
-    case HIP_API_ID_hipFuncGetAttribute: return "hipFuncGetAttribute";
-    case HIP_API_ID_hipCtxGetFlags: return "hipCtxGetFlags";
-    case HIP_API_ID_hipStreamDestroy: return "hipStreamDestroy";
-    case HIP_API_ID___hipPushCallConfiguration: return "__hipPushCallConfiguration";
-    case HIP_API_ID_hipMemset3DAsync: return "hipMemset3DAsync";
-    case HIP_API_ID_hipDeviceGetPCIBusId: return "hipDeviceGetPCIBusId";
-    case HIP_API_ID_hipInit: return "hipInit";
-    case HIP_API_ID_hipMemcpyAtoH: return "hipMemcpyAtoH";
-    case HIP_API_ID_hipStreamGetPriority: return "hipStreamGetPriority";
-    case HIP_API_ID_hipMemset2D: return "hipMemset2D";
-    case HIP_API_ID_hipMemset2DAsync: return "hipMemset2DAsync";
-    case HIP_API_ID_hipDeviceCanAccessPeer: return "hipDeviceCanAccessPeer";
-    case HIP_API_ID_hipLaunchByPtr: return "hipLaunchByPtr";
-    case HIP_API_ID_hipMemPrefetchAsync: return "hipMemPrefetchAsync";
-    case HIP_API_ID_hipCtxDestroy: return "hipCtxDestroy";
-    case HIP_API_ID_hipMemsetD16Async: return "hipMemsetD16Async";
-    case HIP_API_ID_hipModuleUnload: return "hipModuleUnload";
-    case HIP_API_ID_hipHostUnregister: return "hipHostUnregister";
-    case HIP_API_ID_hipProfilerStop: return "hipProfilerStop";
-    case HIP_API_ID_hipExtStreamCreateWithCUMask: return "hipExtStreamCreateWithCUMask";
-    case HIP_API_ID_hipStreamSynchronize: return "hipStreamSynchronize";
-    case HIP_API_ID_hipFreeHost: return "hipFreeHost";
-    case HIP_API_ID_hipDeviceSetCacheConfig: return "hipDeviceSetCacheConfig";
-    case HIP_API_ID_hipGetErrorName: return "hipGetErrorName";
-    case HIP_API_ID_hipMemcpyHtoD: return "hipMemcpyHtoD";
-    case HIP_API_ID_hipModuleGetGlobal: return "hipModuleGetGlobal";
-    case HIP_API_ID_hipMemcpyHtoA: return "hipMemcpyHtoA";
-    case HIP_API_ID_hipCtxCreate: return "hipCtxCreate";
-    case HIP_API_ID_hipMemcpy2D: return "hipMemcpy2D";
-    case HIP_API_ID_hipIpcCloseMemHandle: return "hipIpcCloseMemHandle";
-    case HIP_API_ID_hipChooseDevice: return "hipChooseDevice";
-    case HIP_API_ID_hipDeviceSetSharedMemConfig: return "hipDeviceSetSharedMemConfig";
-    case HIP_API_ID_hipMallocMipmappedArray: return "hipMallocMipmappedArray";
-    case HIP_API_ID_hipSetupArgument: return "hipSetupArgument";
-    case HIP_API_ID_hipIpcGetEventHandle: return "hipIpcGetEventHandle";
-    case HIP_API_ID_hipFreeArray: return "hipFreeArray";
-    case HIP_API_ID_hipCtxSetCacheConfig: return "hipCtxSetCacheConfig";
-    case HIP_API_ID_hipFuncSetCacheConfig: return "hipFuncSetCacheConfig";
-    case HIP_API_ID_hipLaunchKernel: return "hipLaunchKernel";
-    case HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags: return "hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags";
-    case HIP_API_ID_hipModuleGetTexRef: return "hipModuleGetTexRef";
-    case HIP_API_ID_hipFuncSetAttribute: return "hipFuncSetAttribute";
-    case HIP_API_ID_hipEventElapsedTime: return "hipEventElapsedTime";
-    case HIP_API_ID_hipConfigureCall: return "hipConfigureCall";
-    case HIP_API_ID_hipMemAdvise: return "hipMemAdvise";
-    case HIP_API_ID_hipMemcpy3DAsync: return "hipMemcpy3DAsync";
-    case HIP_API_ID_hipEventDestroy: return "hipEventDestroy";
-    case HIP_API_ID_hipCtxPopCurrent: return "hipCtxPopCurrent";
-    case HIP_API_ID_hipGetSymbolAddress: return "hipGetSymbolAddress";
-    case HIP_API_ID_hipHostGetFlags: return "hipHostGetFlags";
-    case HIP_API_ID_hipHostMalloc: return "hipHostMalloc";
-    case HIP_API_ID_hipCtxSetSharedMemConfig: return "hipCtxSetSharedMemConfig";
-    case HIP_API_ID_hipFreeMipmappedArray: return "hipFreeMipmappedArray";
-    case HIP_API_ID_hipMemGetInfo: return "hipMemGetInfo";
-    case HIP_API_ID_hipDeviceReset: return "hipDeviceReset";
-    case HIP_API_ID_hipMemset: return "hipMemset";
-    case HIP_API_ID_hipMemsetD8: return "hipMemsetD8";
-    case HIP_API_ID_hipMemcpyParam2DAsync: return "hipMemcpyParam2DAsync";
-    case HIP_API_ID_hipHostRegister: return "hipHostRegister";
-    case HIP_API_ID_hipDriverGetVersion: return "hipDriverGetVersion";
-    case HIP_API_ID_hipArray3DCreate: return "hipArray3DCreate";
-    case HIP_API_ID_hipIpcOpenMemHandle: return "hipIpcOpenMemHandle";
-    case HIP_API_ID_hipGetLastError: return "hipGetLastError";
-    case HIP_API_ID_hipGetDeviceFlags: return "hipGetDeviceFlags";
-    case HIP_API_ID_hipDeviceGetSharedMemConfig: return "hipDeviceGetSharedMemConfig";
-    case HIP_API_ID_hipDrvMemcpy3D: return "hipDrvMemcpy3D";
-    case HIP_API_ID_hipMemcpy2DFromArray: return "hipMemcpy2DFromArray";
-    case HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags: return "hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags";
-    case HIP_API_ID_hipSetDeviceFlags: return "hipSetDeviceFlags";
-    case HIP_API_ID_hipHccModuleLaunchKernel: return "hipHccModuleLaunchKernel";
-    case HIP_API_ID_hipFree: return "hipFree";
-    case HIP_API_ID_hipOccupancyMaxPotentialBlockSize: return "hipOccupancyMaxPotentialBlockSize";
-    case HIP_API_ID_hipDeviceGetAttribute: return "hipDeviceGetAttribute";
-    case HIP_API_ID_hipDeviceComputeCapability: return "hipDeviceComputeCapability";
-    case HIP_API_ID_hipCtxDisablePeerAccess: return "hipCtxDisablePeerAccess";
-    case HIP_API_ID_hipMallocManaged: return "hipMallocManaged";
-    case HIP_API_ID_hipDeviceGetByPCIBusId: return "hipDeviceGetByPCIBusId";
-    case HIP_API_ID_hipIpcGetMemHandle: return "hipIpcGetMemHandle";
-    case HIP_API_ID_hipMemcpyHtoDAsync: return "hipMemcpyHtoDAsync";
-    case HIP_API_ID_hipCtxGetDevice: return "hipCtxGetDevice";
-    case HIP_API_ID_hipMemcpyDtoD: return "hipMemcpyDtoD";
-    case HIP_API_ID_hipModuleLoadData: return "hipModuleLoadData";
-    case HIP_API_ID_hipDevicePrimaryCtxRelease: return "hipDevicePrimaryCtxRelease";
-    case HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessor: return "hipOccupancyMaxActiveBlocksPerMultiprocessor";
-    case HIP_API_ID_hipCtxSetCurrent: return "hipCtxSetCurrent";
-    case HIP_API_ID_hipGetErrorString: return "hipGetErrorString";
-    case HIP_API_ID_hipStreamCreate: return "hipStreamCreate";
-    case HIP_API_ID_hipDevicePrimaryCtxRetain: return "hipDevicePrimaryCtxRetain";
-    case HIP_API_ID_hipDeviceGet: return "hipDeviceGet";
-    case HIP_API_ID_hipStreamCreateWithFlags: return "hipStreamCreateWithFlags";
-    case HIP_API_ID_hipMemcpyFromArray: return "hipMemcpyFromArray";
-    case HIP_API_ID_hipMemcpy2DAsync: return "hipMemcpy2DAsync";
-    case HIP_API_ID_hipFuncGetAttributes: return "hipFuncGetAttributes";
-    case HIP_API_ID_hipGetSymbolSize: return "hipGetSymbolSize";
-    case HIP_API_ID_hipHostFree: return "hipHostFree";
-    case HIP_API_ID_hipEventCreateWithFlags: return "hipEventCreateWithFlags";
-    case HIP_API_ID_hipStreamQuery: return "hipStreamQuery";
-    case HIP_API_ID_hipMemcpy3D: return "hipMemcpy3D";
-    case HIP_API_ID_hipMemcpyToSymbol: return "hipMemcpyToSymbol";
-    case HIP_API_ID_hipMemcpy: return "hipMemcpy";
-    case HIP_API_ID_hipPeekAtLastError: return "hipPeekAtLastError";
-    case HIP_API_ID_hipExtLaunchMultiKernelMultiDevice: return "hipExtLaunchMultiKernelMultiDevice";
-    case HIP_API_ID_hipHostAlloc: return "hipHostAlloc";
-    case HIP_API_ID_hipStreamAddCallback: return "hipStreamAddCallback";
-    case HIP_API_ID_hipMemcpyToArray: return "hipMemcpyToArray";
-    case HIP_API_ID_hipMemsetD32: return "hipMemsetD32";
-    case HIP_API_ID_hipExtModuleLaunchKernel: return "hipExtModuleLaunchKernel";
-    case HIP_API_ID_hipDeviceSynchronize: return "hipDeviceSynchronize";
-    case HIP_API_ID_hipDeviceGetCacheConfig: return "hipDeviceGetCacheConfig";
-    case HIP_API_ID_hipMalloc3D: return "hipMalloc3D";
-    case HIP_API_ID_hipPointerGetAttributes: return "hipPointerGetAttributes";
-    case HIP_API_ID_hipMemsetAsync: return "hipMemsetAsync";
-    case HIP_API_ID_hipDeviceGetName: return "hipDeviceGetName";
-    case HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSizeWithFlags: return "hipModuleOccupancyMaxPotentialBlockSizeWithFlags";
-    case HIP_API_ID_hipCtxPushCurrent: return "hipCtxPushCurrent";
-    case HIP_API_ID_hipMemcpyPeer: return "hipMemcpyPeer";
-    case HIP_API_ID_hipEventSynchronize: return "hipEventSynchronize";
-    case HIP_API_ID_hipMemcpyDtoDAsync: return "hipMemcpyDtoDAsync";
-    case HIP_API_ID_hipProfilerStart: return "hipProfilerStart";
-    case HIP_API_ID_hipExtMallocWithFlags: return "hipExtMallocWithFlags";
-    case HIP_API_ID_hipCtxEnablePeerAccess: return "hipCtxEnablePeerAccess";
-    case HIP_API_ID_hipMemAllocHost: return "hipMemAllocHost";
-    case HIP_API_ID_hipMemcpyDtoHAsync: return "hipMemcpyDtoHAsync";
-    case HIP_API_ID_hipModuleLaunchKernel: return "hipModuleLaunchKernel";
-    case HIP_API_ID_hipMemAllocPitch: return "hipMemAllocPitch";
-    case HIP_API_ID_hipExtLaunchKernel: return "hipExtLaunchKernel";
-    case HIP_API_ID_hipMemcpy2DFromArrayAsync: return "hipMemcpy2DFromArrayAsync";
-    case HIP_API_ID_hipDeviceGetLimit: return "hipDeviceGetLimit";
-    case HIP_API_ID_hipModuleLoadDataEx: return "hipModuleLoadDataEx";
-    case HIP_API_ID_hipRuntimeGetVersion: return "hipRuntimeGetVersion";
-    case HIP_API_ID_hipMemRangeGetAttribute: return "hipMemRangeGetAttribute";
-    case HIP_API_ID_hipDeviceGetP2PAttribute: return "hipDeviceGetP2PAttribute";
-    case HIP_API_ID_hipMemcpyPeerAsync: return "hipMemcpyPeerAsync";
-    case HIP_API_ID_hipGetDeviceProperties: return "hipGetDeviceProperties";
-    case HIP_API_ID_hipMemcpyDtoH: return "hipMemcpyDtoH";
-    case HIP_API_ID_hipMemcpyWithStream: return "hipMemcpyWithStream";
-    case HIP_API_ID_hipDeviceTotalMem: return "hipDeviceTotalMem";
-    case HIP_API_ID_hipHostGetDevicePointer: return "hipHostGetDevicePointer";
-    case HIP_API_ID_hipMemRangeGetAttributes: return "hipMemRangeGetAttributes";
-    case HIP_API_ID_hipMemcpyParam2D: return "hipMemcpyParam2D";
-    case HIP_API_ID_hipDevicePrimaryCtxReset: return "hipDevicePrimaryCtxReset";
-    case HIP_API_ID_hipGetMipmappedArrayLevel: return "hipGetMipmappedArrayLevel";
-    case HIP_API_ID_hipMemsetD32Async: return "hipMemsetD32Async";
-    case HIP_API_ID_hipGetDevice: return "hipGetDevice";
-    case HIP_API_ID_hipGetDeviceCount: return "hipGetDeviceCount";
-    case HIP_API_ID_hipIpcOpenEventHandle: return "hipIpcOpenEventHandle";
-  };
-  return "unknown";
-};
-
-#include <string.h>
-// Return HIP API ID by given name
-static inline uint32_t hipApiIdByName(const char* name) {
-  if (strcmp("hipDrvMemcpy3DAsync", name) == 0) return HIP_API_ID_hipDrvMemcpy3DAsync;
-  if (strcmp("hipDeviceEnablePeerAccess", name) == 0) return HIP_API_ID_hipDeviceEnablePeerAccess;
-  if (strcmp("hipFuncSetSharedMemConfig", name) == 0) return HIP_API_ID_hipFuncSetSharedMemConfig;
-  if (strcmp("hipMemcpyToSymbolAsync", name) == 0) return HIP_API_ID_hipMemcpyToSymbolAsync;
-  if (strcmp("hipMallocPitch", name) == 0) return HIP_API_ID_hipMallocPitch;
-  if (strcmp("hipMalloc", name) == 0) return HIP_API_ID_hipMalloc;
-  if (strcmp("hipMemsetD16", name) == 0) return HIP_API_ID_hipMemsetD16;
-  if (strcmp("hipExtStreamGetCUMask", name) == 0) return HIP_API_ID_hipExtStreamGetCUMask;
-  if (strcmp("hipEventRecord", name) == 0) return HIP_API_ID_hipEventRecord;
-  if (strcmp("hipCtxSynchronize", name) == 0) return HIP_API_ID_hipCtxSynchronize;
-  if (strcmp("hipSetDevice", name) == 0) return HIP_API_ID_hipSetDevice;
-  if (strcmp("hipCtxGetApiVersion", name) == 0) return HIP_API_ID_hipCtxGetApiVersion;
-  if (strcmp("hipMemcpyFromSymbolAsync", name) == 0) return HIP_API_ID_hipMemcpyFromSymbolAsync;
-  if (strcmp("hipExtGetLinkTypeAndHopCount", name) == 0) return HIP_API_ID_hipExtGetLinkTypeAndHopCount;
-  if (strcmp("__hipPopCallConfiguration", name) == 0) return HIP_API_ID___hipPopCallConfiguration;
-  if (strcmp("hipModuleOccupancyMaxActiveBlocksPerMultiprocessor", name) == 0) return HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor;
-  if (strcmp("hipMemset3D", name) == 0) return HIP_API_ID_hipMemset3D;
-  if (strcmp("hipStreamCreateWithPriority", name) == 0) return HIP_API_ID_hipStreamCreateWithPriority;
-  if (strcmp("hipMemcpy2DToArray", name) == 0) return HIP_API_ID_hipMemcpy2DToArray;
-  if (strcmp("hipMemsetD8Async", name) == 0) return HIP_API_ID_hipMemsetD8Async;
-  if (strcmp("hipCtxGetCacheConfig", name) == 0) return HIP_API_ID_hipCtxGetCacheConfig;
-  if (strcmp("hipModuleGetFunction", name) == 0) return HIP_API_ID_hipModuleGetFunction;
-  if (strcmp("hipStreamWaitEvent", name) == 0) return HIP_API_ID_hipStreamWaitEvent;
-  if (strcmp("hipDeviceGetStreamPriorityRange", name) == 0) return HIP_API_ID_hipDeviceGetStreamPriorityRange;
-  if (strcmp("hipModuleLoad", name) == 0) return HIP_API_ID_hipModuleLoad;
-  if (strcmp("hipDevicePrimaryCtxSetFlags", name) == 0) return HIP_API_ID_hipDevicePrimaryCtxSetFlags;
-  if (strcmp("hipLaunchCooperativeKernel", name) == 0) return HIP_API_ID_hipLaunchCooperativeKernel;
-  if (strcmp("hipLaunchCooperativeKernelMultiDevice", name) == 0) return HIP_API_ID_hipLaunchCooperativeKernelMultiDevice;
-  if (strcmp("hipMemcpyAsync", name) == 0) return HIP_API_ID_hipMemcpyAsync;
-  if (strcmp("hipMalloc3DArray", name) == 0) return HIP_API_ID_hipMalloc3DArray;
-  if (strcmp("hipMallocHost", name) == 0) return HIP_API_ID_hipMallocHost;
-  if (strcmp("hipCtxGetCurrent", name) == 0) return HIP_API_ID_hipCtxGetCurrent;
-  if (strcmp("hipDevicePrimaryCtxGetState", name) == 0) return HIP_API_ID_hipDevicePrimaryCtxGetState;
-  if (strcmp("hipEventQuery", name) == 0) return HIP_API_ID_hipEventQuery;
-  if (strcmp("hipEventCreate", name) == 0) return HIP_API_ID_hipEventCreate;
-  if (strcmp("hipMemGetAddressRange", name) == 0) return HIP_API_ID_hipMemGetAddressRange;
-  if (strcmp("hipMemcpyFromSymbol", name) == 0) return HIP_API_ID_hipMemcpyFromSymbol;
-  if (strcmp("hipArrayCreate", name) == 0) return HIP_API_ID_hipArrayCreate;
-  if (strcmp("hipStreamAttachMemAsync", name) == 0) return HIP_API_ID_hipStreamAttachMemAsync;
-  if (strcmp("hipStreamGetFlags", name) == 0) return HIP_API_ID_hipStreamGetFlags;
-  if (strcmp("hipMallocArray", name) == 0) return HIP_API_ID_hipMallocArray;
-  if (strcmp("hipCtxGetSharedMemConfig", name) == 0) return HIP_API_ID_hipCtxGetSharedMemConfig;
-  if (strcmp("hipDeviceDisablePeerAccess", name) == 0) return HIP_API_ID_hipDeviceDisablePeerAccess;
-  if (strcmp("hipModuleOccupancyMaxPotentialBlockSize", name) == 0) return HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSize;
-  if (strcmp("hipMemPtrGetInfo", name) == 0) return HIP_API_ID_hipMemPtrGetInfo;
-  if (strcmp("hipFuncGetAttribute", name) == 0) return HIP_API_ID_hipFuncGetAttribute;
-  if (strcmp("hipCtxGetFlags", name) == 0) return HIP_API_ID_hipCtxGetFlags;
-  if (strcmp("hipStreamDestroy", name) == 0) return HIP_API_ID_hipStreamDestroy;
-  if (strcmp("__hipPushCallConfiguration", name) == 0) return HIP_API_ID___hipPushCallConfiguration;
-  if (strcmp("hipMemset3DAsync", name) == 0) return HIP_API_ID_hipMemset3DAsync;
-  if (strcmp("hipDeviceGetPCIBusId", name) == 0) return HIP_API_ID_hipDeviceGetPCIBusId;
-  if (strcmp("hipInit", name) == 0) return HIP_API_ID_hipInit;
-  if (strcmp("hipMemcpyAtoH", name) == 0) return HIP_API_ID_hipMemcpyAtoH;
-  if (strcmp("hipStreamGetPriority", name) == 0) return HIP_API_ID_hipStreamGetPriority;
-  if (strcmp("hipMemset2D", name) == 0) return HIP_API_ID_hipMemset2D;
-  if (strcmp("hipMemset2DAsync", name) == 0) return HIP_API_ID_hipMemset2DAsync;
-  if (strcmp("hipDeviceCanAccessPeer", name) == 0) return HIP_API_ID_hipDeviceCanAccessPeer;
-  if (strcmp("hipLaunchByPtr", name) == 0) return HIP_API_ID_hipLaunchByPtr;
-  if (strcmp("hipMemPrefetchAsync", name) == 0) return HIP_API_ID_hipMemPrefetchAsync;
-  if (strcmp("hipCtxDestroy", name) == 0) return HIP_API_ID_hipCtxDestroy;
-  if (strcmp("hipMemsetD16Async", name) == 0) return HIP_API_ID_hipMemsetD16Async;
-  if (strcmp("hipModuleUnload", name) == 0) return HIP_API_ID_hipModuleUnload;
-  if (strcmp("hipHostUnregister", name) == 0) return HIP_API_ID_hipHostUnregister;
-  if (strcmp("hipProfilerStop", name) == 0) return HIP_API_ID_hipProfilerStop;
-  if (strcmp("hipExtStreamCreateWithCUMask", name) == 0) return HIP_API_ID_hipExtStreamCreateWithCUMask;
-  if (strcmp("hipStreamSynchronize", name) == 0) return HIP_API_ID_hipStreamSynchronize;
-  if (strcmp("hipFreeHost", name) == 0) return HIP_API_ID_hipFreeHost;
-  if (strcmp("hipDeviceSetCacheConfig", name) == 0) return HIP_API_ID_hipDeviceSetCacheConfig;
-  if (strcmp("hipGetErrorName", name) == 0) return HIP_API_ID_hipGetErrorName;
-  if (strcmp("hipMemcpyHtoD", name) == 0) return HIP_API_ID_hipMemcpyHtoD;
-  if (strcmp("hipModuleGetGlobal", name) == 0) return HIP_API_ID_hipModuleGetGlobal;
-  if (strcmp("hipMemcpyHtoA", name) == 0) return HIP_API_ID_hipMemcpyHtoA;
-  if (strcmp("hipCtxCreate", name) == 0) return HIP_API_ID_hipCtxCreate;
-  if (strcmp("hipMemcpy2D", name) == 0) return HIP_API_ID_hipMemcpy2D;
-  if (strcmp("hipIpcCloseMemHandle", name) == 0) return HIP_API_ID_hipIpcCloseMemHandle;
-  if (strcmp("hipChooseDevice", name) == 0) return HIP_API_ID_hipChooseDevice;
-  if (strcmp("hipDeviceSetSharedMemConfig", name) == 0) return HIP_API_ID_hipDeviceSetSharedMemConfig;
-  if (strcmp("hipMallocMipmappedArray", name) == 0) return HIP_API_ID_hipMallocMipmappedArray;
-  if (strcmp("hipSetupArgument", name) == 0) return HIP_API_ID_hipSetupArgument;
-  if (strcmp("hipIpcGetEventHandle", name) == 0) return HIP_API_ID_hipIpcGetEventHandle;
-  if (strcmp("hipFreeArray", name) == 0) return HIP_API_ID_hipFreeArray;
-  if (strcmp("hipCtxSetCacheConfig", name) == 0) return HIP_API_ID_hipCtxSetCacheConfig;
-  if (strcmp("hipFuncSetCacheConfig", name) == 0) return HIP_API_ID_hipFuncSetCacheConfig;
-  if (strcmp("hipLaunchKernel", name) == 0) return HIP_API_ID_hipLaunchKernel;
-  if (strcmp("hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", name) == 0) return HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags;
-  if (strcmp("hipModuleGetTexRef", name) == 0) return HIP_API_ID_hipModuleGetTexRef;
-  if (strcmp("hipFuncSetAttribute", name) == 0) return HIP_API_ID_hipFuncSetAttribute;
-  if (strcmp("hipEventElapsedTime", name) == 0) return HIP_API_ID_hipEventElapsedTime;
-  if (strcmp("hipConfigureCall", name) == 0) return HIP_API_ID_hipConfigureCall;
-  if (strcmp("hipMemAdvise", name) == 0) return HIP_API_ID_hipMemAdvise;
-  if (strcmp("hipMemcpy3DAsync", name) == 0) return HIP_API_ID_hipMemcpy3DAsync;
-  if (strcmp("hipEventDestroy", name) == 0) return HIP_API_ID_hipEventDestroy;
-  if (strcmp("hipCtxPopCurrent", name) == 0) return HIP_API_ID_hipCtxPopCurrent;
-  if (strcmp("hipGetSymbolAddress", name) == 0) return HIP_API_ID_hipGetSymbolAddress;
-  if (strcmp("hipHostGetFlags", name) == 0) return HIP_API_ID_hipHostGetFlags;
-  if (strcmp("hipHostMalloc", name) == 0) return HIP_API_ID_hipHostMalloc;
-  if (strcmp("hipCtxSetSharedMemConfig", name) == 0) return HIP_API_ID_hipCtxSetSharedMemConfig;
-  if (strcmp("hipFreeMipmappedArray", name) == 0) return HIP_API_ID_hipFreeMipmappedArray;
-  if (strcmp("hipMemGetInfo", name) == 0) return HIP_API_ID_hipMemGetInfo;
-  if (strcmp("hipDeviceReset", name) == 0) return HIP_API_ID_hipDeviceReset;
-  if (strcmp("hipMemset", name) == 0) return HIP_API_ID_hipMemset;
-  if (strcmp("hipMemsetD8", name) == 0) return HIP_API_ID_hipMemsetD8;
-  if (strcmp("hipMemcpyParam2DAsync", name) == 0) return HIP_API_ID_hipMemcpyParam2DAsync;
-  if (strcmp("hipHostRegister", name) == 0) return HIP_API_ID_hipHostRegister;
-  if (strcmp("hipDriverGetVersion", name) == 0) return HIP_API_ID_hipDriverGetVersion;
-  if (strcmp("hipArray3DCreate", name) == 0) return HIP_API_ID_hipArray3DCreate;
-  if (strcmp("hipIpcOpenMemHandle", name) == 0) return HIP_API_ID_hipIpcOpenMemHandle;
-  if (strcmp("hipGetLastError", name) == 0) return HIP_API_ID_hipGetLastError;
-  if (strcmp("hipGetDeviceFlags", name) == 0) return HIP_API_ID_hipGetDeviceFlags;
-  if (strcmp("hipDeviceGetSharedMemConfig", name) == 0) return HIP_API_ID_hipDeviceGetSharedMemConfig;
-  if (strcmp("hipDrvMemcpy3D", name) == 0) return HIP_API_ID_hipDrvMemcpy3D;
-  if (strcmp("hipMemcpy2DFromArray", name) == 0) return HIP_API_ID_hipMemcpy2DFromArray;
-  if (strcmp("hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", name) == 0) return HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags;
-  if (strcmp("hipSetDeviceFlags", name) == 0) return HIP_API_ID_hipSetDeviceFlags;
-  if (strcmp("hipHccModuleLaunchKernel", name) == 0) return HIP_API_ID_hipHccModuleLaunchKernel;
-  if (strcmp("hipFree", name) == 0) return HIP_API_ID_hipFree;
-  if (strcmp("hipOccupancyMaxPotentialBlockSize", name) == 0) return HIP_API_ID_hipOccupancyMaxPotentialBlockSize;
-  if (strcmp("hipDeviceGetAttribute", name) == 0) return HIP_API_ID_hipDeviceGetAttribute;
-  if (strcmp("hipDeviceComputeCapability", name) == 0) return HIP_API_ID_hipDeviceComputeCapability;
-  if (strcmp("hipCtxDisablePeerAccess", name) == 0) return HIP_API_ID_hipCtxDisablePeerAccess;
-  if (strcmp("hipMallocManaged", name) == 0) return HIP_API_ID_hipMallocManaged;
-  if (strcmp("hipDeviceGetByPCIBusId", name) == 0) return HIP_API_ID_hipDeviceGetByPCIBusId;
-  if (strcmp("hipIpcGetMemHandle", name) == 0) return HIP_API_ID_hipIpcGetMemHandle;
-  if (strcmp("hipMemcpyHtoDAsync", name) == 0) return HIP_API_ID_hipMemcpyHtoDAsync;
-  if (strcmp("hipCtxGetDevice", name) == 0) return HIP_API_ID_hipCtxGetDevice;
-  if (strcmp("hipMemcpyDtoD", name) == 0) return HIP_API_ID_hipMemcpyDtoD;
-  if (strcmp("hipModuleLoadData", name) == 0) return HIP_API_ID_hipModuleLoadData;
-  if (strcmp("hipDevicePrimaryCtxRelease", name) == 0) return HIP_API_ID_hipDevicePrimaryCtxRelease;
-  if (strcmp("hipOccupancyMaxActiveBlocksPerMultiprocessor", name) == 0) return HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessor;
-  if (strcmp("hipCtxSetCurrent", name) == 0) return HIP_API_ID_hipCtxSetCurrent;
-  if (strcmp("hipGetErrorString", name) == 0) return HIP_API_ID_hipGetErrorString;
-  if (strcmp("hipStreamCreate", name) == 0) return HIP_API_ID_hipStreamCreate;
-  if (strcmp("hipDevicePrimaryCtxRetain", name) == 0) return HIP_API_ID_hipDevicePrimaryCtxRetain;
-  if (strcmp("hipDeviceGet", name) == 0) return HIP_API_ID_hipDeviceGet;
-  if (strcmp("hipStreamCreateWithFlags", name) == 0) return HIP_API_ID_hipStreamCreateWithFlags;
-  if (strcmp("hipMemcpyFromArray", name) == 0) return HIP_API_ID_hipMemcpyFromArray;
-  if (strcmp("hipMemcpy2DAsync", name) == 0) return HIP_API_ID_hipMemcpy2DAsync;
-  if (strcmp("hipFuncGetAttributes", name) == 0) return HIP_API_ID_hipFuncGetAttributes;
-  if (strcmp("hipGetSymbolSize", name) == 0) return HIP_API_ID_hipGetSymbolSize;
-  if (strcmp("hipHostFree", name) == 0) return HIP_API_ID_hipHostFree;
-  if (strcmp("hipEventCreateWithFlags", name) == 0) return HIP_API_ID_hipEventCreateWithFlags;
-  if (strcmp("hipStreamQuery", name) == 0) return HIP_API_ID_hipStreamQuery;
-  if (strcmp("hipMemcpy3D", name) == 0) return HIP_API_ID_hipMemcpy3D;
-  if (strcmp("hipMemcpyToSymbol", name) == 0) return HIP_API_ID_hipMemcpyToSymbol;
-  if (strcmp("hipMemcpy", name) == 0) return HIP_API_ID_hipMemcpy;
-  if (strcmp("hipPeekAtLastError", name) == 0) return HIP_API_ID_hipPeekAtLastError;
-  if (strcmp("hipExtLaunchMultiKernelMultiDevice", name) == 0) return HIP_API_ID_hipExtLaunchMultiKernelMultiDevice;
-  if (strcmp("hipHostAlloc", name) == 0) return HIP_API_ID_hipHostAlloc;
-  if (strcmp("hipStreamAddCallback", name) == 0) return HIP_API_ID_hipStreamAddCallback;
-  if (strcmp("hipMemcpyToArray", name) == 0) return HIP_API_ID_hipMemcpyToArray;
-  if (strcmp("hipMemsetD32", name) == 0) return HIP_API_ID_hipMemsetD32;
-  if (strcmp("hipExtModuleLaunchKernel", name) == 0) return HIP_API_ID_hipExtModuleLaunchKernel;
-  if (strcmp("hipDeviceSynchronize", name) == 0) return HIP_API_ID_hipDeviceSynchronize;
-  if (strcmp("hipDeviceGetCacheConfig", name) == 0) return HIP_API_ID_hipDeviceGetCacheConfig;
-  if (strcmp("hipMalloc3D", name) == 0) return HIP_API_ID_hipMalloc3D;
-  if (strcmp("hipPointerGetAttributes", name) == 0) return HIP_API_ID_hipPointerGetAttributes;
-  if (strcmp("hipMemsetAsync", name) == 0) return HIP_API_ID_hipMemsetAsync;
-  if (strcmp("hipDeviceGetName", name) == 0) return HIP_API_ID_hipDeviceGetName;
-  if (strcmp("hipModuleOccupancyMaxPotentialBlockSizeWithFlags", name) == 0) return HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSizeWithFlags;
-  if (strcmp("hipCtxPushCurrent", name) == 0) return HIP_API_ID_hipCtxPushCurrent;
-  if (strcmp("hipMemcpyPeer", name) == 0) return HIP_API_ID_hipMemcpyPeer;
-  if (strcmp("hipEventSynchronize", name) == 0) return HIP_API_ID_hipEventSynchronize;
-  if (strcmp("hipMemcpyDtoDAsync", name) == 0) return HIP_API_ID_hipMemcpyDtoDAsync;
-  if (strcmp("hipProfilerStart", name) == 0) return HIP_API_ID_hipProfilerStart;
-  if (strcmp("hipExtMallocWithFlags", name) == 0) return HIP_API_ID_hipExtMallocWithFlags;
-  if (strcmp("hipCtxEnablePeerAccess", name) == 0) return HIP_API_ID_hipCtxEnablePeerAccess;
-  if (strcmp("hipMemAllocHost", name) == 0) return HIP_API_ID_hipMemAllocHost;
-  if (strcmp("hipMemcpyDtoHAsync", name) == 0) return HIP_API_ID_hipMemcpyDtoHAsync;
-  if (strcmp("hipModuleLaunchKernel", name) == 0) return HIP_API_ID_hipModuleLaunchKernel;
-  if (strcmp("hipMemAllocPitch", name) == 0) return HIP_API_ID_hipMemAllocPitch;
-  if (strcmp("hipExtLaunchKernel", name) == 0) return HIP_API_ID_hipExtLaunchKernel;
-  if (strcmp("hipMemcpy2DFromArrayAsync", name) == 0) return HIP_API_ID_hipMemcpy2DFromArrayAsync;
-  if (strcmp("hipDeviceGetLimit", name) == 0) return HIP_API_ID_hipDeviceGetLimit;
-  if (strcmp("hipModuleLoadDataEx", name) == 0) return HIP_API_ID_hipModuleLoadDataEx;
-  if (strcmp("hipRuntimeGetVersion", name) == 0) return HIP_API_ID_hipRuntimeGetVersion;
-  if (strcmp("hipMemRangeGetAttribute", name) == 0) return HIP_API_ID_hipMemRangeGetAttribute;
-  if (strcmp("hipDeviceGetP2PAttribute", name) == 0) return HIP_API_ID_hipDeviceGetP2PAttribute;
-  if (strcmp("hipMemcpyPeerAsync", name) == 0) return HIP_API_ID_hipMemcpyPeerAsync;
-  if (strcmp("hipGetDeviceProperties", name) == 0) return HIP_API_ID_hipGetDeviceProperties;
-  if (strcmp("hipMemcpyDtoH", name) == 0) return HIP_API_ID_hipMemcpyDtoH;
-  if (strcmp("hipMemcpyWithStream", name) == 0) return HIP_API_ID_hipMemcpyWithStream;
-  if (strcmp("hipDeviceTotalMem", name) == 0) return HIP_API_ID_hipDeviceTotalMem;
-  if (strcmp("hipHostGetDevicePointer", name) == 0) return HIP_API_ID_hipHostGetDevicePointer;
-  if (strcmp("hipMemRangeGetAttributes", name) == 0) return HIP_API_ID_hipMemRangeGetAttributes;
-  if (strcmp("hipMemcpyParam2D", name) == 0) return HIP_API_ID_hipMemcpyParam2D;
-  if (strcmp("hipDevicePrimaryCtxReset", name) == 0) return HIP_API_ID_hipDevicePrimaryCtxReset;
-  if (strcmp("hipGetMipmappedArrayLevel", name) == 0) return HIP_API_ID_hipGetMipmappedArrayLevel;
-  if (strcmp("hipMemsetD32Async", name) == 0) return HIP_API_ID_hipMemsetD32Async;
-  if (strcmp("hipGetDevice", name) == 0) return HIP_API_ID_hipGetDevice;
-  if (strcmp("hipGetDeviceCount", name) == 0) return HIP_API_ID_hipGetDeviceCount;
-  if (strcmp("hipIpcOpenEventHandle", name) == 0) return HIP_API_ID_hipIpcOpenEventHandle;
-  return HIP_API_ID_NUMBER;
-}
-
-// HIP API callbacks data structure
-typedef struct hip_api_data_s {
-  uint64_t correlation_id;
-  uint32_t phase;
-  union {
-    struct {
-      const HIP_MEMCPY3D* pCopy;
-      HIP_MEMCPY3D pCopy__val;
-      hipStream_t stream;
-    } hipDrvMemcpy3DAsync;
-    struct {
-      int peerDeviceId;
-      unsigned int flags;
-    } hipDeviceEnablePeerAccess;
-    struct {
-      const void* func;
-      hipSharedMemConfig config;
-    } hipFuncSetSharedMemConfig;
-    struct {
-      const void* symbol;
-      const void* src;
-      size_t sizeBytes;
-      size_t offset;
-      hipMemcpyKind kind;
-      hipStream_t stream;
-    } hipMemcpyToSymbolAsync;
-    struct {
-      void** ptr;
-      void* ptr__val;
-      size_t* pitch;
-      size_t pitch__val;
-      size_t width;
-      size_t height;
-    } hipMallocPitch;
-    struct {
-      void** ptr;
-      void* ptr__val;
-      size_t size;
-    } hipMalloc;
-    struct {
-      hipDeviceptr_t dest;
-      unsigned short value;
-      size_t count;
-    } hipMemsetD16;
-    struct {
-      hipStream_t stream;
-      unsigned int cuMaskSize;
-      unsigned int* cuMask;
-      unsigned int cuMask__val;
-    } hipExtStreamGetCUMask;
-    struct {
-      hipEvent_t event;
-      hipStream_t stream;
-    } hipEventRecord;
-    struct {
-      int deviceId;
-    } hipSetDevice;
-    struct {
-      hipCtx_t ctx;
-      int* apiVersion;
-      int apiVersion__val;
-    } hipCtxGetApiVersion;
-    struct {
-      void* dst;
-      const void* symbol;
-      size_t sizeBytes;
-      size_t offset;
-      hipMemcpyKind kind;
-      hipStream_t stream;
-    } hipMemcpyFromSymbolAsync;
-    struct {
-      int device1;
-      int device2;
-      unsigned int* linktype;
-      unsigned int linktype__val;
-      unsigned int* hopcount;
-      unsigned int hopcount__val;
-    } hipExtGetLinkTypeAndHopCount;
-    struct {
-      dim3* gridDim;
-      dim3 gridDim__val;
-      dim3* blockDim;
-      dim3 blockDim__val;
-      size_t* sharedMem;
-      size_t sharedMem__val;
-      hipStream_t* stream;
-      hipStream_t stream__val;
-    } __hipPopCallConfiguration;
-    struct {
-      int* numBlocks;
-      int numBlocks__val;
-      hipFunction_t f;
-      int blockSize;
-      size_t dynSharedMemPerBlk;
-    } hipModuleOccupancyMaxActiveBlocksPerMultiprocessor;
-    struct {
-      hipPitchedPtr pitchedDevPtr;
-      int value;
-      hipExtent extent;
-    } hipMemset3D;
-    struct {
-      hipStream_t* stream;
-      hipStream_t stream__val;
-      unsigned int flags;
-      int priority;
-    } hipStreamCreateWithPriority;
-    struct {
-      hipArray* dst;
-      hipArray dst__val;
-      size_t wOffset;
-      size_t hOffset;
-      const void* src;
-      size_t spitch;
-      size_t width;
-      size_t height;
-      hipMemcpyKind kind;
-    } hipMemcpy2DToArray;
-    struct {
-      hipDeviceptr_t dest;
-      unsigned char value;
-      size_t count;
-      hipStream_t stream;
-    } hipMemsetD8Async;
-    struct {
-      hipFuncCache_t* cacheConfig;
-      hipFuncCache_t cacheConfig__val;
-    } hipCtxGetCacheConfig;
-    struct {
-      hipFunction_t* function;
-      hipFunction_t function__val;
-      hipModule_t module;
-      const char* kname;
-      char kname__val;
-    } hipModuleGetFunction;
-    struct {
-      hipStream_t stream;
-      hipEvent_t event;
-      unsigned int flags;
-    } hipStreamWaitEvent;
-    struct {
-      int* leastPriority;
-      int leastPriority__val;
-      int* greatestPriority;
-      int greatestPriority__val;
-    } hipDeviceGetStreamPriorityRange;
-    struct {
-      hipModule_t* module;
-      hipModule_t module__val;
-      const char* fname;
-      char fname__val;
-    } hipModuleLoad;
-    struct {
-      hipDevice_t dev;
-      unsigned int flags;
-    } hipDevicePrimaryCtxSetFlags;
-    struct {
-      const void* f;
-      dim3 gridDim;
-      dim3 blockDimX;
-      void** kernelParams;
-      void* kernelParams__val;
-      unsigned int sharedMemBytes;
-      hipStream_t stream;
-    } hipLaunchCooperativeKernel;
-    struct {
-      hipLaunchParams* launchParamsList;
-      hipLaunchParams launchParamsList__val;
-      int numDevices;
-      unsigned int flags;
-    } hipLaunchCooperativeKernelMultiDevice;
-    struct {
-      void* dst;
-      const void* src;
-      size_t sizeBytes;
-      hipMemcpyKind kind;
-      hipStream_t stream;
-    } hipMemcpyAsync;
-    struct {
-      hipArray_t* array;
-      hipArray_t array__val;
-      const hipChannelFormatDesc* desc;
-      hipChannelFormatDesc desc__val;
-      hipExtent extent;
-      unsigned int flags;
-    } hipMalloc3DArray;
-    struct {
-      void** ptr;
-      void* ptr__val;
-      size_t size;
-    } hipMallocHost;
-    struct {
-      hipCtx_t* ctx;
-      hipCtx_t ctx__val;
-    } hipCtxGetCurrent;
-    struct {
-      hipDevice_t dev;
-      unsigned int* flags;
-      unsigned int flags__val;
-      int* active;
-      int active__val;
-    } hipDevicePrimaryCtxGetState;
-    struct {
-      hipEvent_t event;
-    } hipEventQuery;
-    struct {
-      hipEvent_t* event;
-      hipEvent_t event__val;
-    } hipEventCreate;
-    struct {
-      hipDeviceptr_t* pbase;
-      hipDeviceptr_t pbase__val;
-      size_t* psize;
-      size_t psize__val;
-      hipDeviceptr_t dptr;
-    } hipMemGetAddressRange;
-    struct {
-      void* dst;
-      const void* symbol;
-      size_t sizeBytes;
-      size_t offset;
-      hipMemcpyKind kind;
-    } hipMemcpyFromSymbol;
-    struct {
-      hipArray** pHandle;
-      hipArray* pHandle__val;
-      const HIP_ARRAY_DESCRIPTOR* pAllocateArray;
-      HIP_ARRAY_DESCRIPTOR pAllocateArray__val;
-    } hipArrayCreate;
-    struct {
-      hipStream_t stream;
-      hipDeviceptr_t* dev_ptr;
-      hipDeviceptr_t dev_ptr__val;
-      size_t length;
-      unsigned int flags;
-    } hipStreamAttachMemAsync;
-    struct {
-      hipStream_t stream;
-      unsigned int* flags;
-      unsigned int flags__val;
-    } hipStreamGetFlags;
-    struct {
-      hipArray** array;
-      hipArray* array__val;
-      const hipChannelFormatDesc* desc;
-      hipChannelFormatDesc desc__val;
-      size_t width;
-      size_t height;
-      unsigned int flags;
-    } hipMallocArray;
-    struct {
-      hipSharedMemConfig* pConfig;
-      hipSharedMemConfig pConfig__val;
-    } hipCtxGetSharedMemConfig;
-    struct {
-      int peerDeviceId;
-    } hipDeviceDisablePeerAccess;
-    struct {
-      int* gridSize;
-      int gridSize__val;
-      int* blockSize;
-      int blockSize__val;
-      hipFunction_t f;
-      size_t dynSharedMemPerBlk;
-      int blockSizeLimit;
-    } hipModuleOccupancyMaxPotentialBlockSize;
-    struct {
-      void* ptr;
-      size_t* size;
-      size_t size__val;
-    } hipMemPtrGetInfo;
-    struct {
-      int* value;
-      int value__val;
-      hipFunction_attribute attrib;
-      hipFunction_t hfunc;
-    } hipFuncGetAttribute;
-    struct {
-      unsigned int* flags;
-      unsigned int flags__val;
-    } hipCtxGetFlags;
-    struct {
-      hipStream_t stream;
-    } hipStreamDestroy;
-    struct {
-      dim3 gridDim;
-      dim3 blockDim;
-      size_t sharedMem;
-      hipStream_t stream;
-    } __hipPushCallConfiguration;
-    struct {
-      hipPitchedPtr pitchedDevPtr;
-      int value;
-      hipExtent extent;
-      hipStream_t stream;
-    } hipMemset3DAsync;
-    struct {
-      char* pciBusId;
-      char pciBusId__val;
-      int len;
-      int device;
-    } hipDeviceGetPCIBusId;
-    struct {
-      unsigned int flags;
-    } hipInit;
-    struct {
-      void* dst;
-      hipArray* srcArray;
-      hipArray srcArray__val;
-      size_t srcOffset;
-      size_t count;
-    } hipMemcpyAtoH;
-    struct {
-      hipStream_t stream;
-      int* priority;
-      int priority__val;
-    } hipStreamGetPriority;
-    struct {
-      void* dst;
-      size_t pitch;
-      int value;
-      size_t width;
-      size_t height;
-    } hipMemset2D;
-    struct {
-      void* dst;
-      size_t pitch;
-      int value;
-      size_t width;
-      size_t height;
-      hipStream_t stream;
-    } hipMemset2DAsync;
-    struct {
-      int* canAccessPeer;
-      int canAccessPeer__val;
-      int deviceId;
-      int peerDeviceId;
-    } hipDeviceCanAccessPeer;
-    struct {
-      const void* hostFunction;
-    } hipLaunchByPtr;
-    struct {
-      const void* dev_ptr;
-      size_t count;
-      int device;
-      hipStream_t stream;
-    } hipMemPrefetchAsync;
-    struct {
-      hipCtx_t ctx;
-    } hipCtxDestroy;
-    struct {
-      hipDeviceptr_t dest;
-      unsigned short value;
-      size_t count;
-      hipStream_t stream;
-    } hipMemsetD16Async;
-    struct {
-      hipModule_t module;
-    } hipModuleUnload;
-    struct {
-      void* hostPtr;
-    } hipHostUnregister;
-    struct {
-      hipStream_t* stream;
-      hipStream_t stream__val;
-      unsigned int cuMaskSize;
-      const unsigned int* cuMask;
-      unsigned int cuMask__val;
-    } hipExtStreamCreateWithCUMask;
-    struct {
-      hipStream_t stream;
-    } hipStreamSynchronize;
-    struct {
-      void* ptr;
-    } hipFreeHost;
-    struct {
-      hipFuncCache_t cacheConfig;
-    } hipDeviceSetCacheConfig;
-    struct {
-      hipDeviceptr_t dst;
-      void* src;
-      size_t sizeBytes;
-    } hipMemcpyHtoD;
-    struct {
-      hipDeviceptr_t* dptr;
-      hipDeviceptr_t dptr__val;
-      size_t* bytes;
-      size_t bytes__val;
-      hipModule_t hmod;
-      const char* name;
-      char name__val;
-    } hipModuleGetGlobal;
-    struct {
-      hipArray* dstArray;
-      hipArray dstArray__val;
-      size_t dstOffset;
-      const void* srcHost;
-      size_t count;
-    } hipMemcpyHtoA;
-    struct {
-      hipCtx_t* ctx;
-      hipCtx_t ctx__val;
-      unsigned int flags;
-      hipDevice_t device;
-    } hipCtxCreate;
-    struct {
-      void* dst;
-      size_t dpitch;
-      const void* src;
-      size_t spitch;
-      size_t width;
-      size_t height;
-      hipMemcpyKind kind;
-    } hipMemcpy2D;
-    struct {
-      void* devPtr;
-    } hipIpcCloseMemHandle;
-    struct {
-      int* device;
-      int device__val;
-      const hipDeviceProp_t* prop;
-      hipDeviceProp_t prop__val;
-    } hipChooseDevice;
-    struct {
-      hipSharedMemConfig config;
-    } hipDeviceSetSharedMemConfig;
-    struct {
-      hipMipmappedArray_t* mipmappedArray;
-      hipMipmappedArray_t mipmappedArray__val;
-      const hipChannelFormatDesc* desc;
-      hipChannelFormatDesc desc__val;
-      hipExtent extent;
-      unsigned int numLevels;
-      unsigned int flags;
-    } hipMallocMipmappedArray;
-    struct {
-      const void* arg;
-      size_t size;
-      size_t offset;
-    } hipSetupArgument;
-    struct {
-      hipIpcEventHandle_t* handle;
-      hipIpcEventHandle_t handle__val;
-      hipEvent_t event;
-    } hipIpcGetEventHandle;
-    struct {
-      hipArray* array;
-      hipArray array__val;
-    } hipFreeArray;
-    struct {
-      hipFuncCache_t cacheConfig;
-    } hipCtxSetCacheConfig;
-    struct {
-      const void* func;
-      hipFuncCache_t config;
-    } hipFuncSetCacheConfig;
-    struct {
-      const void* function_address;
-      dim3 numBlocks;
-      dim3 dimBlocks;
-      void** args;
-      void* args__val;
-      size_t sharedMemBytes;
-      hipStream_t stream;
-    } hipLaunchKernel;
-    struct {
-      int* numBlocks;
-      int numBlocks__val;
-      hipFunction_t f;
-      int blockSize;
-      size_t dynSharedMemPerBlk;
-      unsigned int flags;
-    } hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags;
-    struct {
-      textureReference** texRef;
-      textureReference* texRef__val;
-      hipModule_t hmod;
-      const char* name;
-      char name__val;
-    } hipModuleGetTexRef;
-    struct {
-      const void* func;
-      hipFuncAttribute attr;
-      int value;
-    } hipFuncSetAttribute;
-    struct {
-      float* ms;
-      float ms__val;
-      hipEvent_t start;
-      hipEvent_t stop;
-    } hipEventElapsedTime;
-    struct {
-      dim3 gridDim;
-      dim3 blockDim;
-      size_t sharedMem;
-      hipStream_t stream;
-    } hipConfigureCall;
-    struct {
-      const void* dev_ptr;
-      size_t count;
-      hipMemoryAdvise advice;
-      int device;
-    } hipMemAdvise;
-    struct {
-      const hipMemcpy3DParms* p;
-      hipMemcpy3DParms p__val;
-      hipStream_t stream;
-    } hipMemcpy3DAsync;
-    struct {
-      hipEvent_t event;
-    } hipEventDestroy;
-    struct {
-      hipCtx_t* ctx;
-      hipCtx_t ctx__val;
-    } hipCtxPopCurrent;
-    struct {
-      void** devPtr;
-      void* devPtr__val;
-      const void* symbol;
-    } hipGetSymbolAddress;
-    struct {
-      unsigned int* flagsPtr;
-      unsigned int flagsPtr__val;
-      void* hostPtr;
-    } hipHostGetFlags;
-    struct {
-      void** ptr;
-      void* ptr__val;
-      size_t size;
-      unsigned int flags;
-    } hipHostMalloc;
-    struct {
-      hipSharedMemConfig config;
-    } hipCtxSetSharedMemConfig;
-    struct {
-      hipMipmappedArray_t mipmappedArray;
-    } hipFreeMipmappedArray;
-    struct {
-      size_t* free;
-      size_t free__val;
-      size_t* total;
-      size_t total__val;
-    } hipMemGetInfo;
-    struct {
-      void* dst;
-      int value;
-      size_t sizeBytes;
-    } hipMemset;
-    struct {
-      hipDeviceptr_t dest;
-      unsigned char value;
-      size_t count;
-    } hipMemsetD8;
-    struct {
-      const hip_Memcpy2D* pCopy;
-      hip_Memcpy2D pCopy__val;
-      hipStream_t stream;
-    } hipMemcpyParam2DAsync;
-    struct {
-      void* hostPtr;
-      size_t sizeBytes;
-      unsigned int flags;
-    } hipHostRegister;
-    struct {
-      int* driverVersion;
-      int driverVersion__val;
-    } hipDriverGetVersion;
-    struct {
-      hipArray** array;
-      hipArray* array__val;
-      const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray;
-      HIP_ARRAY3D_DESCRIPTOR pAllocateArray__val;
-    } hipArray3DCreate;
-    struct {
-      void** devPtr;
-      void* devPtr__val;
-      hipIpcMemHandle_t handle;
-      unsigned int flags;
-    } hipIpcOpenMemHandle;
-    struct {
-      unsigned int* flags;
-      unsigned int flags__val;
-    } hipGetDeviceFlags;
-    struct {
-      hipSharedMemConfig* pConfig;
-      hipSharedMemConfig pConfig__val;
-    } hipDeviceGetSharedMemConfig;
-    struct {
-      const HIP_MEMCPY3D* pCopy;
-      HIP_MEMCPY3D pCopy__val;
-    } hipDrvMemcpy3D;
-    struct {
-      void* dst;
-      size_t dpitch;
-      hipArray_const_t src;
-      size_t wOffset;
-      size_t hOffset;
-      size_t width;
-      size_t height;
-      hipMemcpyKind kind;
-    } hipMemcpy2DFromArray;
-    struct {
-      int* numBlocks;
-      int numBlocks__val;
-      const void* f;
-      int blockSize;
-      size_t dynamicSMemSize;
-      unsigned int flags;
-    } hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags;
-    struct {
-      unsigned int flags;
-    } hipSetDeviceFlags;
-    struct {
-      hipFunction_t f;
-      unsigned int globalWorkSizeX;
-      unsigned int globalWorkSizeY;
-      unsigned int globalWorkSizeZ;
-      unsigned int blockDimX;
-      unsigned int blockDimY;
-      unsigned int blockDimZ;
-      size_t sharedMemBytes;
-      hipStream_t hStream;
-      void** kernelParams;
-      void* kernelParams__val;
-      void** extra;
-      void* extra__val;
-      hipEvent_t startEvent;
-      hipEvent_t stopEvent;
-    } hipHccModuleLaunchKernel;
-    struct {
-      void* ptr;
-    } hipFree;
-    struct {
-      int* gridSize;
-      int gridSize__val;
-      int* blockSize;
-      int blockSize__val;
-      const void* f;
-      size_t dynSharedMemPerBlk;
-      int blockSizeLimit;
-    } hipOccupancyMaxPotentialBlockSize;
-    struct {
-      int* pi;
-      int pi__val;
-      hipDeviceAttribute_t attr;
-      int deviceId;
-    } hipDeviceGetAttribute;
-    struct {
-      int* major;
-      int major__val;
-      int* minor;
-      int minor__val;
-      hipDevice_t device;
-    } hipDeviceComputeCapability;
-    struct {
-      hipCtx_t peerCtx;
-    } hipCtxDisablePeerAccess;
-    struct {
-      void** dev_ptr;
-      void* dev_ptr__val;
-      size_t size;
-      unsigned int flags;
-    } hipMallocManaged;
-    struct {
-      int* device;
-      int device__val;
-      const char* pciBusId;
-      char pciBusId__val;
-    } hipDeviceGetByPCIBusId;
-    struct {
-      hipIpcMemHandle_t* handle;
-      hipIpcMemHandle_t handle__val;
-      void* devPtr;
-    } hipIpcGetMemHandle;
-    struct {
-      hipDeviceptr_t dst;
-      void* src;
-      size_t sizeBytes;
-      hipStream_t stream;
-    } hipMemcpyHtoDAsync;
-    struct {
-      hipDevice_t* device;
-      hipDevice_t device__val;
-    } hipCtxGetDevice;
-    struct {
-      hipDeviceptr_t dst;
-      hipDeviceptr_t src;
-      size_t sizeBytes;
-    } hipMemcpyDtoD;
-    struct {
-      hipModule_t* module;
-      hipModule_t module__val;
-      const void* image;
-    } hipModuleLoadData;
-    struct {
-      hipDevice_t dev;
-    } hipDevicePrimaryCtxRelease;
-    struct {
-      int* numBlocks;
-      int numBlocks__val;
-      const void* f;
-      int blockSize;
-      size_t dynamicSMemSize;
-    } hipOccupancyMaxActiveBlocksPerMultiprocessor;
-    struct {
-      hipCtx_t ctx;
-    } hipCtxSetCurrent;
-    struct {
-      hipStream_t* stream;
-      hipStream_t stream__val;
-    } hipStreamCreate;
-    struct {
-      hipCtx_t* pctx;
-      hipCtx_t pctx__val;
-      hipDevice_t dev;
-    } hipDevicePrimaryCtxRetain;
-    struct {
-      hipDevice_t* device;
-      hipDevice_t device__val;
-      int ordinal;
-    } hipDeviceGet;
-    struct {
-      hipStream_t* stream;
-      hipStream_t stream__val;
-      unsigned int flags;
-    } hipStreamCreateWithFlags;
-    struct {
-      void* dst;
-      hipArray_const_t srcArray;
-      size_t wOffset;
-      size_t hOffset;
-      size_t count;
-      hipMemcpyKind kind;
-    } hipMemcpyFromArray;
-    struct {
-      void* dst;
-      size_t dpitch;
-      const void* src;
-      size_t spitch;
-      size_t width;
-      size_t height;
-      hipMemcpyKind kind;
-      hipStream_t stream;
-    } hipMemcpy2DAsync;
-    struct {
-      hipFuncAttributes* attr;
-      hipFuncAttributes attr__val;
-      const void* func;
-    } hipFuncGetAttributes;
-    struct {
-      size_t* size;
-      size_t size__val;
-      const void* symbol;
-    } hipGetSymbolSize;
-    struct {
-      void* ptr;
-    } hipHostFree;
-    struct {
-      hipEvent_t* event;
-      hipEvent_t event__val;
-      unsigned int flags;
-    } hipEventCreateWithFlags;
-    struct {
-      hipStream_t stream;
-    } hipStreamQuery;
-    struct {
-      const hipMemcpy3DParms* p;
-      hipMemcpy3DParms p__val;
-    } hipMemcpy3D;
-    struct {
-      const void* symbol;
-      const void* src;
-      size_t sizeBytes;
-      size_t offset;
-      hipMemcpyKind kind;
-    } hipMemcpyToSymbol;
-    struct {
-      void* dst;
-      const void* src;
-      size_t sizeBytes;
-      hipMemcpyKind kind;
-    } hipMemcpy;
-    struct {
-      hipLaunchParams* launchParamsList;
-      hipLaunchParams launchParamsList__val;
-      int numDevices;
-      unsigned int flags;
-    } hipExtLaunchMultiKernelMultiDevice;
-    struct {
-      void** ptr;
-      void* ptr__val;
-      size_t size;
-      unsigned int flags;
-    } hipHostAlloc;
-    struct {
-      hipStream_t stream;
-      hipStreamCallback_t callback;
-      void* userData;
-      unsigned int flags;
-    } hipStreamAddCallback;
-    struct {
-      hipArray* dst;
-      hipArray dst__val;
-      size_t wOffset;
-      size_t hOffset;
-      const void* src;
-      size_t count;
-      hipMemcpyKind kind;
-    } hipMemcpyToArray;
-    struct {
-      hipDeviceptr_t dest;
-      int value;
-      size_t count;
-    } hipMemsetD32;
-    struct {
-      hipFunction_t f;
-      unsigned int globalWorkSizeX;
-      unsigned int globalWorkSizeY;
-      unsigned int globalWorkSizeZ;
-      unsigned int localWorkSizeX;
-      unsigned int localWorkSizeY;
-      unsigned int localWorkSizeZ;
-      size_t sharedMemBytes;
-      hipStream_t hStream;
-      void** kernelParams;
-      void* kernelParams__val;
-      void** extra;
-      void* extra__val;
-      hipEvent_t startEvent;
-      hipEvent_t stopEvent;
-      unsigned int flags;
-    } hipExtModuleLaunchKernel;
-    struct {
-      hipFuncCache_t* cacheConfig;
-      hipFuncCache_t cacheConfig__val;
-    } hipDeviceGetCacheConfig;
-    struct {
-      hipPitchedPtr* pitchedDevPtr;
-      hipPitchedPtr pitchedDevPtr__val;
-      hipExtent extent;
-    } hipMalloc3D;
-    struct {
-      hipPointerAttribute_t* attributes;
-      hipPointerAttribute_t attributes__val;
-      const void* ptr;
-    } hipPointerGetAttributes;
-    struct {
-      void* dst;
-      int value;
-      size_t sizeBytes;
-      hipStream_t stream;
-    } hipMemsetAsync;
-    struct {
-      char* name;
-      char name__val;
-      int len;
-      hipDevice_t device;
-    } hipDeviceGetName;
-    struct {
-      int* gridSize;
-      int gridSize__val;
-      int* blockSize;
-      int blockSize__val;
-      hipFunction_t f;
-      size_t dynSharedMemPerBlk;
-      int blockSizeLimit;
-      unsigned int flags;
-    } hipModuleOccupancyMaxPotentialBlockSizeWithFlags;
-    struct {
-      hipCtx_t ctx;
-    } hipCtxPushCurrent;
-    struct {
-      void* dst;
-      int dstDeviceId;
-      const void* src;
-      int srcDeviceId;
-      size_t sizeBytes;
-    } hipMemcpyPeer;
-    struct {
-      hipEvent_t event;
-    } hipEventSynchronize;
-    struct {
-      hipDeviceptr_t dst;
-      hipDeviceptr_t src;
-      size_t sizeBytes;
-      hipStream_t stream;
-    } hipMemcpyDtoDAsync;
-    struct {
-      void** ptr;
-      void* ptr__val;
-      size_t sizeBytes;
-      unsigned int flags;
-    } hipExtMallocWithFlags;
-    struct {
-      hipCtx_t peerCtx;
-      unsigned int flags;
-    } hipCtxEnablePeerAccess;
-    struct {
-      void** ptr;
-      void* ptr__val;
-      size_t size;
-    } hipMemAllocHost;
-    struct {
-      void* dst;
-      hipDeviceptr_t src;
-      size_t sizeBytes;
-      hipStream_t stream;
-    } hipMemcpyDtoHAsync;
-    struct {
-      hipFunction_t f;
-      unsigned int gridDimX;
-      unsigned int gridDimY;
-      unsigned int gridDimZ;
-      unsigned int blockDimX;
-      unsigned int blockDimY;
-      unsigned int blockDimZ;
-      unsigned int sharedMemBytes;
-      hipStream_t stream;
-      void** kernelParams;
-      void* kernelParams__val;
-      void** extra;
-      void* extra__val;
-    } hipModuleLaunchKernel;
-    struct {
-      hipDeviceptr_t* dptr;
-      hipDeviceptr_t dptr__val;
-      size_t* pitch;
-      size_t pitch__val;
-      size_t widthInBytes;
-      size_t height;
-      unsigned int elementSizeBytes;
-    } hipMemAllocPitch;
-    struct {
-      const void* function_address;
-      dim3 numBlocks;
-      dim3 dimBlocks;
-      void** args;
-      void* args__val;
-      size_t sharedMemBytes;
-      hipStream_t stream;
-      hipEvent_t startEvent;
-      hipEvent_t stopEvent;
-      int flags;
-    } hipExtLaunchKernel;
-    struct {
-      void* dst;
-      size_t dpitch;
-      hipArray_const_t src;
-      size_t wOffset;
-      size_t hOffset;
-      size_t width;
-      size_t height;
-      hipMemcpyKind kind;
-      hipStream_t stream;
-    } hipMemcpy2DFromArrayAsync;
-    struct {
-      size_t* pValue;
-      size_t pValue__val;
-      enum hipLimit_t limit;
-    } hipDeviceGetLimit;
-    struct {
-      hipModule_t* module;
-      hipModule_t module__val;
-      const void* image;
-      unsigned int numOptions;
-      hipJitOption* options;
-      hipJitOption options__val;
-      void** optionsValues;
-      void* optionsValues__val;
-    } hipModuleLoadDataEx;
-    struct {
-      int* runtimeVersion;
-      int runtimeVersion__val;
-    } hipRuntimeGetVersion;
-    struct {
-      void* data;
-      size_t data_size;
-      hipMemRangeAttribute attribute;
-      const void* dev_ptr;
-      size_t count;
-    } hipMemRangeGetAttribute;
-    struct {
-      int* value;
-      int value__val;
-      hipDeviceP2PAttr attr;
-      int srcDevice;
-      int dstDevice;
-    } hipDeviceGetP2PAttribute;
-    struct {
-      void* dst;
-      int dstDeviceId;
-      const void* src;
-      int srcDevice;
-      size_t sizeBytes;
-      hipStream_t stream;
-    } hipMemcpyPeerAsync;
-    struct {
-      hipDeviceProp_t* props;
-      hipDeviceProp_t props__val;
-      hipDevice_t device;
-    } hipGetDeviceProperties;
-    struct {
-      void* dst;
-      hipDeviceptr_t src;
-      size_t sizeBytes;
-    } hipMemcpyDtoH;
-    struct {
-      void* dst;
-      const void* src;
-      size_t sizeBytes;
-      hipMemcpyKind kind;
-      hipStream_t stream;
-    } hipMemcpyWithStream;
-    struct {
-      size_t* bytes;
-      size_t bytes__val;
-      hipDevice_t device;
-    } hipDeviceTotalMem;
-    struct {
-      void** devPtr;
-      void* devPtr__val;
-      void* hstPtr;
-      unsigned int flags;
-    } hipHostGetDevicePointer;
-    struct {
-      void** data;
-      void* data__val;
-      size_t* data_sizes;
-      size_t data_sizes__val;
-      hipMemRangeAttribute* attributes;
-      hipMemRangeAttribute attributes__val;
-      size_t num_attributes;
-      const void* dev_ptr;
-      size_t count;
-    } hipMemRangeGetAttributes;
-    struct {
-      const hip_Memcpy2D* pCopy;
-      hip_Memcpy2D pCopy__val;
-    } hipMemcpyParam2D;
-    struct {
-      hipDevice_t dev;
-    } hipDevicePrimaryCtxReset;
-    struct {
-      hipArray_t* levelArray;
-      hipArray_t levelArray__val;
-      hipMipmappedArray_const_t mipmappedArray;
-      unsigned int level;
-    } hipGetMipmappedArrayLevel;
-    struct {
-      hipDeviceptr_t dst;
-      int value;
-      size_t count;
-      hipStream_t stream;
-    } hipMemsetD32Async;
-    struct {
-      int* deviceId;
-      int deviceId__val;
-    } hipGetDevice;
-    struct {
-      int* count;
-      int count__val;
-    } hipGetDeviceCount;
-    struct {
-      hipEvent_t* event;
-      hipEvent_t event__val;
-      hipIpcEventHandle_t handle;
-    } hipIpcOpenEventHandle;
-  } args;
-} hip_api_data_t;
-
-// HIP API callbacks args data filling macros
-// hipDrvMemcpy3DAsync[('const HIP_MEMCPY3D*', 'pCopy'), ('hipStream_t', 'stream')]
-#define INIT_hipDrvMemcpy3DAsync_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipDrvMemcpy3DAsync.pCopy = (const HIP_MEMCPY3D*)pCopy; \
-  cb_data.args.hipDrvMemcpy3DAsync.stream = (hipStream_t)stream; \
-};
-// hipDeviceEnablePeerAccess[('int', 'peerDeviceId'), ('unsigned int', 'flags')]
-#define INIT_hipDeviceEnablePeerAccess_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipDeviceEnablePeerAccess.peerDeviceId = (int)peerDeviceId; \
-  cb_data.args.hipDeviceEnablePeerAccess.flags = (unsigned int)flags; \
-};
-// hipFuncSetSharedMemConfig[('const void*', 'func'), ('hipSharedMemConfig', 'config')]
-#define INIT_hipFuncSetSharedMemConfig_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipFuncSetSharedMemConfig.func = (const void*)func; \
-  cb_data.args.hipFuncSetSharedMemConfig.config = (hipSharedMemConfig)config; \
-};
-// hipMemcpyToSymbolAsync[('const void*', 'symbol'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')]
-#define INIT_hipMemcpyToSymbolAsync_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemcpyToSymbolAsync.symbol = (const void*)symbol; \
-  cb_data.args.hipMemcpyToSymbolAsync.src = (const void*)src; \
-  cb_data.args.hipMemcpyToSymbolAsync.sizeBytes = (size_t)sizeBytes; \
-  cb_data.args.hipMemcpyToSymbolAsync.offset = (size_t)offset; \
-  cb_data.args.hipMemcpyToSymbolAsync.kind = (hipMemcpyKind)kind; \
-  cb_data.args.hipMemcpyToSymbolAsync.stream = (hipStream_t)stream; \
-};
-// hipMallocPitch[('void**', 'ptr'), ('size_t*', 'pitch'), ('size_t', 'width'), ('size_t', 'height')]
-#define INIT_hipMallocPitch_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMallocPitch.ptr = (void**)ptr; \
-  cb_data.args.hipMallocPitch.pitch = (size_t*)pitch; \
-  cb_data.args.hipMallocPitch.width = (size_t)width; \
-  cb_data.args.hipMallocPitch.height = (size_t)height; \
-};
-// hipMalloc[('void**', 'ptr'), ('size_t', 'size')]
-#define INIT_hipMalloc_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMalloc.ptr = (void**)ptr; \
-  cb_data.args.hipMalloc.size = (size_t)sizeBytes; \
-};
-// hipMemsetD16[('hipDeviceptr_t', 'dest'), ('unsigned short', 'value'), ('size_t', 'count')]
-#define INIT_hipMemsetD16_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemsetD16.dest = (hipDeviceptr_t)dst; \
-  cb_data.args.hipMemsetD16.value = (unsigned short)value; \
-  cb_data.args.hipMemsetD16.count = (size_t)count; \
-};
-// hipExtStreamGetCUMask[('hipStream_t', 'stream'), ('unsigned int', 'cuMaskSize'), ('unsigned int*', 'cuMask')]
-#define INIT_hipExtStreamGetCUMask_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipExtStreamGetCUMask.stream = (hipStream_t)stream; \
-  cb_data.args.hipExtStreamGetCUMask.cuMaskSize = (unsigned int)cuMaskSize; \
-  cb_data.args.hipExtStreamGetCUMask.cuMask = (unsigned int*)cuMask; \
-};
-// hipEventRecord[('hipEvent_t', 'event'), ('hipStream_t', 'stream')]
-#define INIT_hipEventRecord_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipEventRecord.event = (hipEvent_t)event; \
-  cb_data.args.hipEventRecord.stream = (hipStream_t)stream; \
-};
-// hipCtxSynchronize[]
-#define INIT_hipCtxSynchronize_CB_ARGS_DATA(cb_data) { \
-};
-// hipSetDevice[('int', 'deviceId')]
-#define INIT_hipSetDevice_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipSetDevice.deviceId = (int)device; \
-};
-// hipCtxGetApiVersion[('hipCtx_t', 'ctx'), ('int*', 'apiVersion')]
-#define INIT_hipCtxGetApiVersion_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipCtxGetApiVersion.ctx = (hipCtx_t)ctx; \
-  cb_data.args.hipCtxGetApiVersion.apiVersion = (int*)apiVersion; \
-};
-// hipMemcpyFromSymbolAsync[('void*', 'dst'), ('const void*', 'symbol'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')]
-#define INIT_hipMemcpyFromSymbolAsync_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemcpyFromSymbolAsync.dst = (void*)dst; \
-  cb_data.args.hipMemcpyFromSymbolAsync.symbol = (const void*)symbol; \
-  cb_data.args.hipMemcpyFromSymbolAsync.sizeBytes = (size_t)sizeBytes; \
-  cb_data.args.hipMemcpyFromSymbolAsync.offset = (size_t)offset; \
-  cb_data.args.hipMemcpyFromSymbolAsync.kind = (hipMemcpyKind)kind; \
-  cb_data.args.hipMemcpyFromSymbolAsync.stream = (hipStream_t)stream; \
-};
-// hipExtGetLinkTypeAndHopCount[('int', 'device1'), ('int', 'device2'), ('unsigned int*', 'linktype'), ('unsigned int*', 'hopcount')]
-#define INIT_hipExtGetLinkTypeAndHopCount_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipExtGetLinkTypeAndHopCount.device1 = (int)device1; \
-  cb_data.args.hipExtGetLinkTypeAndHopCount.device2 = (int)device2; \
-  cb_data.args.hipExtGetLinkTypeAndHopCount.linktype = (unsigned int*)linktype; \
-  cb_data.args.hipExtGetLinkTypeAndHopCount.hopcount = (unsigned int*)hopcount; \
-};
-// __hipPopCallConfiguration[('dim3*', 'gridDim'), ('dim3*', 'blockDim'), ('size_t*', 'sharedMem'), ('hipStream_t*', 'stream')]
-#define INIT___hipPopCallConfiguration_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.__hipPopCallConfiguration.gridDim = (dim3*)gridDim; \
-  cb_data.args.__hipPopCallConfiguration.blockDim = (dim3*)blockDim; \
-  cb_data.args.__hipPopCallConfiguration.sharedMem = (size_t*)sharedMem; \
-  cb_data.args.__hipPopCallConfiguration.stream = (hipStream_t*)stream; \
-};
-// hipModuleOccupancyMaxActiveBlocksPerMultiprocessor[('int*', 'numBlocks'), ('hipFunction_t', 'f'), ('int', 'blockSize'), ('size_t', 'dynSharedMemPerBlk')]
-#define INIT_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks = (int*)numBlocks; \
-  cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.f = (hipFunction_t)f; \
-  cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.blockSize = (int)blockSize; \
-  cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.dynSharedMemPerBlk = (size_t)dynSharedMemPerBlk; \
-};
-// hipMemset3D[('hipPitchedPtr', 'pitchedDevPtr'), ('int', 'value'), ('hipExtent', 'extent')]
-#define INIT_hipMemset3D_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemset3D.pitchedDevPtr = (hipPitchedPtr)pitchedDevPtr; \
-  cb_data.args.hipMemset3D.value = (int)value; \
-  cb_data.args.hipMemset3D.extent = (hipExtent)extent; \
-};
-// hipStreamCreateWithPriority[('hipStream_t*', 'stream'), ('unsigned int', 'flags'), ('int', 'priority')]
-#define INIT_hipStreamCreateWithPriority_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipStreamCreateWithPriority.stream = (hipStream_t*)stream; \
-  cb_data.args.hipStreamCreateWithPriority.flags = (unsigned int)flags; \
-  cb_data.args.hipStreamCreateWithPriority.priority = (int)priority; \
-};
-// hipMemcpy2DToArray[('hipArray*', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind')]
-#define INIT_hipMemcpy2DToArray_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemcpy2DToArray.dst = (hipArray*)dst; \
-  cb_data.args.hipMemcpy2DToArray.wOffset = (size_t)wOffset; \
-  cb_data.args.hipMemcpy2DToArray.hOffset = (size_t)hOffset; \
-  cb_data.args.hipMemcpy2DToArray.src = (const void*)src; \
-  cb_data.args.hipMemcpy2DToArray.spitch = (size_t)spitch; \
-  cb_data.args.hipMemcpy2DToArray.width = (size_t)width; \
-  cb_data.args.hipMemcpy2DToArray.height = (size_t)height; \
-  cb_data.args.hipMemcpy2DToArray.kind = (hipMemcpyKind)kind; \
-};
-// hipMemsetD8Async[('hipDeviceptr_t', 'dest'), ('unsigned char', 'value'), ('size_t', 'count'), ('hipStream_t', 'stream')]
-#define INIT_hipMemsetD8Async_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemsetD8Async.dest = (hipDeviceptr_t)dst; \
-  cb_data.args.hipMemsetD8Async.value = (unsigned char)value; \
-  cb_data.args.hipMemsetD8Async.count = (size_t)count; \
-  cb_data.args.hipMemsetD8Async.stream = (hipStream_t)stream; \
-};
-// hipCtxGetCacheConfig[('hipFuncCache_t*', 'cacheConfig')]
-#define INIT_hipCtxGetCacheConfig_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipCtxGetCacheConfig.cacheConfig = (hipFuncCache_t*)cacheConfig; \
-};
-// hipModuleGetFunction[('hipFunction_t*', 'function'), ('hipModule_t', 'module'), ('const char*', 'kname')]
-#define INIT_hipModuleGetFunction_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipModuleGetFunction.function = (hipFunction_t*)hfunc; \
-  cb_data.args.hipModuleGetFunction.module = (hipModule_t)hmod; \
-  cb_data.args.hipModuleGetFunction.kname = (name) ? strdup(name) : NULL; \
-};
-// hipStreamWaitEvent[('hipStream_t', 'stream'), ('hipEvent_t', 'event'), ('unsigned int', 'flags')]
-#define INIT_hipStreamWaitEvent_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipStreamWaitEvent.stream = (hipStream_t)stream; \
-  cb_data.args.hipStreamWaitEvent.event = (hipEvent_t)event; \
-  cb_data.args.hipStreamWaitEvent.flags = (unsigned int)flags; \
-};
-// hipDeviceGetStreamPriorityRange[('int*', 'leastPriority'), ('int*', 'greatestPriority')]
-#define INIT_hipDeviceGetStreamPriorityRange_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipDeviceGetStreamPriorityRange.leastPriority = (int*)leastPriority; \
-  cb_data.args.hipDeviceGetStreamPriorityRange.greatestPriority = (int*)greatestPriority; \
-};
-// hipModuleLoad[('hipModule_t*', 'module'), ('const char*', 'fname')]
-#define INIT_hipModuleLoad_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipModuleLoad.module = (hipModule_t*)module; \
-  cb_data.args.hipModuleLoad.fname = (fname) ? strdup(fname) : NULL; \
-};
-// hipDevicePrimaryCtxSetFlags[('hipDevice_t', 'dev'), ('unsigned int', 'flags')]
-#define INIT_hipDevicePrimaryCtxSetFlags_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipDevicePrimaryCtxSetFlags.dev = (hipDevice_t)dev; \
-  cb_data.args.hipDevicePrimaryCtxSetFlags.flags = (unsigned int)flags; \
-};
-// hipLaunchCooperativeKernel[('const void*', 'f'), ('dim3', 'gridDim'), ('dim3', 'blockDimX'), ('void**', 'kernelParams'), ('unsigned int', 'sharedMemBytes'), ('hipStream_t', 'stream')]
-#define INIT_hipLaunchCooperativeKernel_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipLaunchCooperativeKernel.f = (const void*)f; \
-  cb_data.args.hipLaunchCooperativeKernel.gridDim = (dim3)gridDim; \
-  cb_data.args.hipLaunchCooperativeKernel.blockDimX = (dim3)blockDim; \
-  cb_data.args.hipLaunchCooperativeKernel.kernelParams = (void**)kernelParams; \
-  cb_data.args.hipLaunchCooperativeKernel.sharedMemBytes = (unsigned int)sharedMemBytes; \
-  cb_data.args.hipLaunchCooperativeKernel.stream = (hipStream_t)hStream; \
-};
-// hipLaunchCooperativeKernelMultiDevice[('hipLaunchParams*', 'launchParamsList'), ('int', 'numDevices'), ('unsigned int', 'flags')]
-#define INIT_hipLaunchCooperativeKernelMultiDevice_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipLaunchCooperativeKernelMultiDevice.launchParamsList = (hipLaunchParams*)launchParamsList; \
-  cb_data.args.hipLaunchCooperativeKernelMultiDevice.numDevices = (int)numDevices; \
-  cb_data.args.hipLaunchCooperativeKernelMultiDevice.flags = (unsigned int)flags; \
-};
-// hipMemcpyAsync[('void*', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')]
-#define INIT_hipMemcpyAsync_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemcpyAsync.dst = (void*)dst; \
-  cb_data.args.hipMemcpyAsync.src = (const void*)src; \
-  cb_data.args.hipMemcpyAsync.sizeBytes = (size_t)sizeBytes; \
-  cb_data.args.hipMemcpyAsync.kind = (hipMemcpyKind)kind; \
-  cb_data.args.hipMemcpyAsync.stream = (hipStream_t)stream; \
-};
-// hipMalloc3DArray[('hipArray_t*', 'array'), ('const hipChannelFormatDesc*', 'desc'), ('hipExtent', 'extent'), ('unsigned int', 'flags')]
-#define INIT_hipMalloc3DArray_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMalloc3DArray.array = (hipArray_t*)array; \
-  cb_data.args.hipMalloc3DArray.desc = (const hipChannelFormatDesc*)desc; \
-  cb_data.args.hipMalloc3DArray.extent = (hipExtent)extent; \
-  cb_data.args.hipMalloc3DArray.flags = (unsigned int)flags; \
-};
-// hipMallocHost[('void**', 'ptr'), ('size_t', 'size')]
-#define INIT_hipMallocHost_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMallocHost.ptr = (void**)ptr; \
-  cb_data.args.hipMallocHost.size = (size_t)size; \
-};
-// hipCtxGetCurrent[('hipCtx_t*', 'ctx')]
-#define INIT_hipCtxGetCurrent_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipCtxGetCurrent.ctx = (hipCtx_t*)ctx; \
-};
-// hipDevicePrimaryCtxGetState[('hipDevice_t', 'dev'), ('unsigned int*', 'flags'), ('int*', 'active')]
-#define INIT_hipDevicePrimaryCtxGetState_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipDevicePrimaryCtxGetState.dev = (hipDevice_t)dev; \
-  cb_data.args.hipDevicePrimaryCtxGetState.flags = (unsigned int*)flags; \
-  cb_data.args.hipDevicePrimaryCtxGetState.active = (int*)active; \
-};
-// hipEventQuery[('hipEvent_t', 'event')]
-#define INIT_hipEventQuery_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipEventQuery.event = (hipEvent_t)event; \
-};
-// hipEventCreate[('hipEvent_t*', 'event')]
-#define INIT_hipEventCreate_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipEventCreate.event = (hipEvent_t*)event; \
-};
-// hipMemGetAddressRange[('hipDeviceptr_t*', 'pbase'), ('size_t*', 'psize'), ('hipDeviceptr_t', 'dptr')]
-#define INIT_hipMemGetAddressRange_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemGetAddressRange.pbase = (hipDeviceptr_t*)pbase; \
-  cb_data.args.hipMemGetAddressRange.psize = (size_t*)psize; \
-  cb_data.args.hipMemGetAddressRange.dptr = (hipDeviceptr_t)dptr; \
-};
-// hipMemcpyFromSymbol[('void*', 'dst'), ('const void*', 'symbol'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')]
-#define INIT_hipMemcpyFromSymbol_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemcpyFromSymbol.dst = (void*)dst; \
-  cb_data.args.hipMemcpyFromSymbol.symbol = (const void*)symbol; \
-  cb_data.args.hipMemcpyFromSymbol.sizeBytes = (size_t)sizeBytes; \
-  cb_data.args.hipMemcpyFromSymbol.offset = (size_t)offset; \
-  cb_data.args.hipMemcpyFromSymbol.kind = (hipMemcpyKind)kind; \
-};
-// hipArrayCreate[('hipArray**', 'pHandle'), ('const HIP_ARRAY_DESCRIPTOR*', 'pAllocateArray')]
-#define INIT_hipArrayCreate_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipArrayCreate.pHandle = (hipArray**)array; \
-  cb_data.args.hipArrayCreate.pAllocateArray = (const HIP_ARRAY_DESCRIPTOR*)pAllocateArray; \
-};
-// hipStreamAttachMemAsync[('hipStream_t', 'stream'), ('hipDeviceptr_t*', 'dev_ptr'), ('size_t', 'length'), ('unsigned int', 'flags')]
-#define INIT_hipStreamAttachMemAsync_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipStreamAttachMemAsync.stream = (hipStream_t)stream; \
-  cb_data.args.hipStreamAttachMemAsync.dev_ptr = (hipDeviceptr_t*)dev_ptr; \
-  cb_data.args.hipStreamAttachMemAsync.length = (size_t)length; \
-  cb_data.args.hipStreamAttachMemAsync.flags = (unsigned int)flags; \
-};
-// hipStreamGetFlags[('hipStream_t', 'stream'), ('unsigned int*', 'flags')]
-#define INIT_hipStreamGetFlags_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipStreamGetFlags.stream = (hipStream_t)stream; \
-  cb_data.args.hipStreamGetFlags.flags = (unsigned int*)flags; \
-};
-// hipMallocArray[('hipArray**', 'array'), ('const hipChannelFormatDesc*', 'desc'), ('size_t', 'width'), ('size_t', 'height'), ('unsigned int', 'flags')]
-#define INIT_hipMallocArray_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMallocArray.array = (hipArray**)array; \
-  cb_data.args.hipMallocArray.desc = (const hipChannelFormatDesc*)desc; \
-  cb_data.args.hipMallocArray.width = (size_t)width; \
-  cb_data.args.hipMallocArray.height = (size_t)height; \
-  cb_data.args.hipMallocArray.flags = (unsigned int)flags; \
-};
-// hipCtxGetSharedMemConfig[('hipSharedMemConfig*', 'pConfig')]
-#define INIT_hipCtxGetSharedMemConfig_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipCtxGetSharedMemConfig.pConfig = (hipSharedMemConfig*)pConfig; \
-};
-// hipDeviceDisablePeerAccess[('int', 'peerDeviceId')]
-#define INIT_hipDeviceDisablePeerAccess_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipDeviceDisablePeerAccess.peerDeviceId = (int)peerDeviceId; \
-};
-// hipModuleOccupancyMaxPotentialBlockSize[('int*', 'gridSize'), ('int*', 'blockSize'), ('hipFunction_t', 'f'), ('size_t', 'dynSharedMemPerBlk'), ('int', 'blockSizeLimit')]
-#define INIT_hipModuleOccupancyMaxPotentialBlockSize_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipModuleOccupancyMaxPotentialBlockSize.gridSize = (int*)gridSize; \
-  cb_data.args.hipModuleOccupancyMaxPotentialBlockSize.blockSize = (int*)blockSize; \
-  cb_data.args.hipModuleOccupancyMaxPotentialBlockSize.f = (hipFunction_t)f; \
-  cb_data.args.hipModuleOccupancyMaxPotentialBlockSize.dynSharedMemPerBlk = (size_t)dynSharedMemPerBlk; \
-  cb_data.args.hipModuleOccupancyMaxPotentialBlockSize.blockSizeLimit = (int)blockSizeLimit; \
-};
-// hipMemPtrGetInfo[('void*', 'ptr'), ('size_t*', 'size')]
-#define INIT_hipMemPtrGetInfo_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemPtrGetInfo.ptr = (void*)ptr; \
-  cb_data.args.hipMemPtrGetInfo.size = (size_t*)size; \
-};
-// hipFuncGetAttribute[('int*', 'value'), ('hipFunction_attribute', 'attrib'), ('hipFunction_t', 'hfunc')]
-#define INIT_hipFuncGetAttribute_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipFuncGetAttribute.value = (int*)value; \
-  cb_data.args.hipFuncGetAttribute.attrib = (hipFunction_attribute)attrib; \
-  cb_data.args.hipFuncGetAttribute.hfunc = (hipFunction_t)hfunc; \
-};
-// hipCtxGetFlags[('unsigned int*', 'flags')]
-#define INIT_hipCtxGetFlags_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipCtxGetFlags.flags = (unsigned int*)flags; \
-};
-// hipStreamDestroy[('hipStream_t', 'stream')]
-#define INIT_hipStreamDestroy_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipStreamDestroy.stream = (hipStream_t)stream; \
-};
-// __hipPushCallConfiguration[('dim3', 'gridDim'), ('dim3', 'blockDim'), ('size_t', 'sharedMem'), ('hipStream_t', 'stream')]
-#define INIT___hipPushCallConfiguration_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.__hipPushCallConfiguration.gridDim = (dim3)gridDim; \
-  cb_data.args.__hipPushCallConfiguration.blockDim = (dim3)blockDim; \
-  cb_data.args.__hipPushCallConfiguration.sharedMem = (size_t)sharedMem; \
-  cb_data.args.__hipPushCallConfiguration.stream = (hipStream_t)stream; \
-};
-// hipMemset3DAsync[('hipPitchedPtr', 'pitchedDevPtr'), ('int', 'value'), ('hipExtent', 'extent'), ('hipStream_t', 'stream')]
-#define INIT_hipMemset3DAsync_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemset3DAsync.pitchedDevPtr = (hipPitchedPtr)pitchedDevPtr; \
-  cb_data.args.hipMemset3DAsync.value = (int)value; \
-  cb_data.args.hipMemset3DAsync.extent = (hipExtent)extent; \
-  cb_data.args.hipMemset3DAsync.stream = (hipStream_t)stream; \
-};
-// hipDeviceGetPCIBusId[('char*', 'pciBusId'), ('int', 'len'), ('int', 'device')]
-#define INIT_hipDeviceGetPCIBusId_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipDeviceGetPCIBusId.pciBusId = (char*)pciBusId; \
-  cb_data.args.hipDeviceGetPCIBusId.len = (int)len; \
-  cb_data.args.hipDeviceGetPCIBusId.device = (int)device; \
-};
-// hipInit[('unsigned int', 'flags')]
-#define INIT_hipInit_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipInit.flags = (unsigned int)flags; \
-};
-// hipMemcpyAtoH[('void*', 'dst'), ('hipArray*', 'srcArray'), ('size_t', 'srcOffset'), ('size_t', 'count')]
-#define INIT_hipMemcpyAtoH_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemcpyAtoH.dst = (void*)dstHost; \
-  cb_data.args.hipMemcpyAtoH.srcArray = (hipArray*)srcArray; \
-  cb_data.args.hipMemcpyAtoH.srcOffset = (size_t)srcOffset; \
-  cb_data.args.hipMemcpyAtoH.count = (size_t)ByteCount; \
-};
-// hipStreamGetPriority[('hipStream_t', 'stream'), ('int*', 'priority')]
-#define INIT_hipStreamGetPriority_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipStreamGetPriority.stream = (hipStream_t)stream; \
-  cb_data.args.hipStreamGetPriority.priority = (int*)priority; \
-};
-// hipMemset2D[('void*', 'dst'), ('size_t', 'pitch'), ('int', 'value'), ('size_t', 'width'), ('size_t', 'height')]
-#define INIT_hipMemset2D_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemset2D.dst = (void*)dst; \
-  cb_data.args.hipMemset2D.pitch = (size_t)pitch; \
-  cb_data.args.hipMemset2D.value = (int)value; \
-  cb_data.args.hipMemset2D.width = (size_t)width; \
-  cb_data.args.hipMemset2D.height = (size_t)height; \
-};
-// hipMemset2DAsync[('void*', 'dst'), ('size_t', 'pitch'), ('int', 'value'), ('size_t', 'width'), ('size_t', 'height'), ('hipStream_t', 'stream')]
-#define INIT_hipMemset2DAsync_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemset2DAsync.dst = (void*)dst; \
-  cb_data.args.hipMemset2DAsync.pitch = (size_t)pitch; \
-  cb_data.args.hipMemset2DAsync.value = (int)value; \
-  cb_data.args.hipMemset2DAsync.width = (size_t)width; \
-  cb_data.args.hipMemset2DAsync.height = (size_t)height; \
-  cb_data.args.hipMemset2DAsync.stream = (hipStream_t)stream; \
-};
-// hipDeviceCanAccessPeer[('int*', 'canAccessPeer'), ('int', 'deviceId'), ('int', 'peerDeviceId')]
-#define INIT_hipDeviceCanAccessPeer_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipDeviceCanAccessPeer.canAccessPeer = (int*)canAccess; \
-  cb_data.args.hipDeviceCanAccessPeer.deviceId = (int)deviceId; \
-  cb_data.args.hipDeviceCanAccessPeer.peerDeviceId = (int)peerDeviceId; \
-};
-// hipLaunchByPtr[('const void*', 'hostFunction')]
-#define INIT_hipLaunchByPtr_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipLaunchByPtr.hostFunction = (const void*)hostFunction; \
-};
-// hipMemPrefetchAsync[('const void*', 'dev_ptr'), ('size_t', 'count'), ('int', 'device'), ('hipStream_t', 'stream')]
-#define INIT_hipMemPrefetchAsync_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemPrefetchAsync.dev_ptr = (const void*)dev_ptr; \
-  cb_data.args.hipMemPrefetchAsync.count = (size_t)count; \
-  cb_data.args.hipMemPrefetchAsync.device = (int)device; \
-  cb_data.args.hipMemPrefetchAsync.stream = (hipStream_t)stream; \
-};
-// hipCtxDestroy[('hipCtx_t', 'ctx')]
-#define INIT_hipCtxDestroy_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipCtxDestroy.ctx = (hipCtx_t)ctx; \
-};
-// hipMemsetD16Async[('hipDeviceptr_t', 'dest'), ('unsigned short', 'value'), ('size_t', 'count'), ('hipStream_t', 'stream')]
-#define INIT_hipMemsetD16Async_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemsetD16Async.dest = (hipDeviceptr_t)dst; \
-  cb_data.args.hipMemsetD16Async.value = (unsigned short)value; \
-  cb_data.args.hipMemsetD16Async.count = (size_t)count; \
-  cb_data.args.hipMemsetD16Async.stream = (hipStream_t)stream; \
-};
-// hipModuleUnload[('hipModule_t', 'module')]
-#define INIT_hipModuleUnload_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipModuleUnload.module = (hipModule_t)hmod; \
-};
-// hipHostUnregister[('void*', 'hostPtr')]
-#define INIT_hipHostUnregister_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipHostUnregister.hostPtr = (void*)hostPtr; \
-};
-// hipProfilerStop[]
-#define INIT_hipProfilerStop_CB_ARGS_DATA(cb_data) { \
-};
-// hipExtStreamCreateWithCUMask[('hipStream_t*', 'stream'), ('unsigned int', 'cuMaskSize'), ('const unsigned int*', 'cuMask')]
-#define INIT_hipExtStreamCreateWithCUMask_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipExtStreamCreateWithCUMask.stream = (hipStream_t*)stream; \
-  cb_data.args.hipExtStreamCreateWithCUMask.cuMaskSize = (unsigned int)cuMaskSize; \
-  cb_data.args.hipExtStreamCreateWithCUMask.cuMask = (const unsigned int*)cuMask; \
-};
-// hipStreamSynchronize[('hipStream_t', 'stream')]
-#define INIT_hipStreamSynchronize_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipStreamSynchronize.stream = (hipStream_t)stream; \
-};
-// hipFreeHost[('void*', 'ptr')]
-#define INIT_hipFreeHost_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipFreeHost.ptr = (void*)ptr; \
-};
-// hipDeviceSetCacheConfig[('hipFuncCache_t', 'cacheConfig')]
-#define INIT_hipDeviceSetCacheConfig_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipDeviceSetCacheConfig.cacheConfig = (hipFuncCache_t)cacheConfig; \
-};
-// hipGetErrorName[]
-#define INIT_hipGetErrorName_CB_ARGS_DATA(cb_data) { \
-};
-// hipMemcpyHtoD[('hipDeviceptr_t', 'dst'), ('void*', 'src'), ('size_t', 'sizeBytes')]
-#define INIT_hipMemcpyHtoD_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemcpyHtoD.dst = (hipDeviceptr_t)dstDevice; \
-  cb_data.args.hipMemcpyHtoD.src = (void*)srcHost; \
-  cb_data.args.hipMemcpyHtoD.sizeBytes = (size_t)ByteCount; \
-};
-// hipModuleGetGlobal[('hipDeviceptr_t*', 'dptr'), ('size_t*', 'bytes'), ('hipModule_t', 'hmod'), ('const char*', 'name')]
-#define INIT_hipModuleGetGlobal_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipModuleGetGlobal.dptr = (hipDeviceptr_t*)dptr; \
-  cb_data.args.hipModuleGetGlobal.bytes = (size_t*)bytes; \
-  cb_data.args.hipModuleGetGlobal.hmod = (hipModule_t)hmod; \
-  cb_data.args.hipModuleGetGlobal.name = (name) ? strdup(name) : NULL; \
-};
-// hipMemcpyHtoA[('hipArray*', 'dstArray'), ('size_t', 'dstOffset'), ('const void*', 'srcHost'), ('size_t', 'count')]
-#define INIT_hipMemcpyHtoA_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemcpyHtoA.dstArray = (hipArray*)dstArray; \
-  cb_data.args.hipMemcpyHtoA.dstOffset = (size_t)dstOffset; \
-  cb_data.args.hipMemcpyHtoA.srcHost = (const void*)srcHost; \
-  cb_data.args.hipMemcpyHtoA.count = (size_t)ByteCount; \
-};
-// hipCtxCreate[('hipCtx_t*', 'ctx'), ('unsigned int', 'flags'), ('hipDevice_t', 'device')]
-#define INIT_hipCtxCreate_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipCtxCreate.ctx = (hipCtx_t*)ctx; \
-  cb_data.args.hipCtxCreate.flags = (unsigned int)flags; \
-  cb_data.args.hipCtxCreate.device = (hipDevice_t)device; \
-};
-// hipMemcpy2D[('void*', 'dst'), ('size_t', 'dpitch'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind')]
-#define INIT_hipMemcpy2D_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemcpy2D.dst = (void*)dst; \
-  cb_data.args.hipMemcpy2D.dpitch = (size_t)dpitch; \
-  cb_data.args.hipMemcpy2D.src = (const void*)src; \
-  cb_data.args.hipMemcpy2D.spitch = (size_t)spitch; \
-  cb_data.args.hipMemcpy2D.width = (size_t)width; \
-  cb_data.args.hipMemcpy2D.height = (size_t)height; \
-  cb_data.args.hipMemcpy2D.kind = (hipMemcpyKind)kind; \
-};
-// hipIpcCloseMemHandle[('void*', 'devPtr')]
-#define INIT_hipIpcCloseMemHandle_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipIpcCloseMemHandle.devPtr = (void*)dev_ptr; \
-};
-// hipChooseDevice[('int*', 'device'), ('const hipDeviceProp_t*', 'prop')]
-#define INIT_hipChooseDevice_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipChooseDevice.device = (int*)device; \
-  cb_data.args.hipChooseDevice.prop = (const hipDeviceProp_t*)properties; \
-};
-// hipDeviceSetSharedMemConfig[('hipSharedMemConfig', 'config')]
-#define INIT_hipDeviceSetSharedMemConfig_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipDeviceSetSharedMemConfig.config = (hipSharedMemConfig)config; \
-};
-// hipMallocMipmappedArray[('hipMipmappedArray_t*', 'mipmappedArray'), ('const hipChannelFormatDesc*', 'desc'), ('hipExtent', 'extent'), ('unsigned int', 'numLevels'), ('unsigned int', 'flags')]
-#define INIT_hipMallocMipmappedArray_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMallocMipmappedArray.mipmappedArray = (hipMipmappedArray_t*)mipmappedArray; \
-  cb_data.args.hipMallocMipmappedArray.desc = (const hipChannelFormatDesc*)desc; \
-  cb_data.args.hipMallocMipmappedArray.extent = (hipExtent)extent; \
-  cb_data.args.hipMallocMipmappedArray.numLevels = (unsigned int)numLevels; \
-  cb_data.args.hipMallocMipmappedArray.flags = (unsigned int)flags; \
-};
-// hipSetupArgument[('const void*', 'arg'), ('size_t', 'size'), ('size_t', 'offset')]
-#define INIT_hipSetupArgument_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipSetupArgument.arg = (const void*)arg; \
-  cb_data.args.hipSetupArgument.size = (size_t)size; \
-  cb_data.args.hipSetupArgument.offset = (size_t)offset; \
-};
-// hipIpcGetEventHandle[('hipIpcEventHandle_t*', 'handle'), ('hipEvent_t', 'event')]
-#define INIT_hipIpcGetEventHandle_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipIpcGetEventHandle.handle = (hipIpcEventHandle_t*)handle; \
-  cb_data.args.hipIpcGetEventHandle.event = (hipEvent_t)event; \
-};
-// hipFreeArray[('hipArray*', 'array')]
-#define INIT_hipFreeArray_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipFreeArray.array = (hipArray*)array; \
-};
-// hipCtxSetCacheConfig[('hipFuncCache_t', 'cacheConfig')]
-#define INIT_hipCtxSetCacheConfig_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipCtxSetCacheConfig.cacheConfig = (hipFuncCache_t)cacheConfig; \
-};
-// hipFuncSetCacheConfig[('const void*', 'func'), ('hipFuncCache_t', 'config')]
-#define INIT_hipFuncSetCacheConfig_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipFuncSetCacheConfig.func = (const void*)func; \
-  cb_data.args.hipFuncSetCacheConfig.config = (hipFuncCache_t)cacheConfig; \
-};
-// hipLaunchKernel[('const void*', 'function_address'), ('dim3', 'numBlocks'), ('dim3', 'dimBlocks'), ('void**', 'args'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'stream')]
-#define INIT_hipLaunchKernel_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipLaunchKernel.function_address = (const void*)hostFunction; \
-  cb_data.args.hipLaunchKernel.numBlocks = (dim3)gridDim; \
-  cb_data.args.hipLaunchKernel.dimBlocks = (dim3)blockDim; \
-  cb_data.args.hipLaunchKernel.args = (void**)args; \
-  cb_data.args.hipLaunchKernel.sharedMemBytes = (size_t)sharedMemBytes; \
-  cb_data.args.hipLaunchKernel.stream = (hipStream_t)stream; \
-};
-// hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags[('int*', 'numBlocks'), ('hipFunction_t', 'f'), ('int', 'blockSize'), ('size_t', 'dynSharedMemPerBlk'), ('unsigned int', 'flags')]
-#define INIT_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks = (int*)numBlocks; \
-  cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.f = (hipFunction_t)f; \
-  cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.blockSize = (int)blockSize; \
-  cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.dynSharedMemPerBlk = (size_t)dynSharedMemPerBlk; \
-  cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.flags = (unsigned int)flags; \
-};
-// hipModuleGetTexRef[('textureReference**', 'texRef'), ('hipModule_t', 'hmod'), ('const char*', 'name')]
-#define INIT_hipModuleGetTexRef_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipModuleGetTexRef.texRef = (textureReference**)texRef; \
-  cb_data.args.hipModuleGetTexRef.hmod = (hipModule_t)hmod; \
-  cb_data.args.hipModuleGetTexRef.name = (name) ? strdup(name) : NULL; \
-};
-// hipFuncSetAttribute[('const void*', 'func'), ('hipFuncAttribute', 'attr'), ('int', 'value')]
-#define INIT_hipFuncSetAttribute_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipFuncSetAttribute.func = (const void*)func; \
-  cb_data.args.hipFuncSetAttribute.attr = (hipFuncAttribute)attr; \
-  cb_data.args.hipFuncSetAttribute.value = (int)value; \
-};
-// hipEventElapsedTime[('float*', 'ms'), ('hipEvent_t', 'start'), ('hipEvent_t', 'stop')]
-#define INIT_hipEventElapsedTime_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipEventElapsedTime.ms = (float*)ms; \
-  cb_data.args.hipEventElapsedTime.start = (hipEvent_t)start; \
-  cb_data.args.hipEventElapsedTime.stop = (hipEvent_t)stop; \
-};
-// hipConfigureCall[('dim3', 'gridDim'), ('dim3', 'blockDim'), ('size_t', 'sharedMem'), ('hipStream_t', 'stream')]
-#define INIT_hipConfigureCall_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipConfigureCall.gridDim = (dim3)gridDim; \
-  cb_data.args.hipConfigureCall.blockDim = (dim3)blockDim; \
-  cb_data.args.hipConfigureCall.sharedMem = (size_t)sharedMem; \
-  cb_data.args.hipConfigureCall.stream = (hipStream_t)stream; \
-};
-// hipMemAdvise[('const void*', 'dev_ptr'), ('size_t', 'count'), ('hipMemoryAdvise', 'advice'), ('int', 'device')]
-#define INIT_hipMemAdvise_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemAdvise.dev_ptr = (const void*)dev_ptr; \
-  cb_data.args.hipMemAdvise.count = (size_t)count; \
-  cb_data.args.hipMemAdvise.advice = (hipMemoryAdvise)advice; \
-  cb_data.args.hipMemAdvise.device = (int)device; \
-};
-// hipMemcpy3DAsync[('const hipMemcpy3DParms*', 'p'), ('hipStream_t', 'stream')]
-#define INIT_hipMemcpy3DAsync_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemcpy3DAsync.p = (const hipMemcpy3DParms*)p; \
-  cb_data.args.hipMemcpy3DAsync.stream = (hipStream_t)stream; \
-};
-// hipEventDestroy[('hipEvent_t', 'event')]
-#define INIT_hipEventDestroy_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipEventDestroy.event = (hipEvent_t)event; \
-};
-// hipCtxPopCurrent[('hipCtx_t*', 'ctx')]
-#define INIT_hipCtxPopCurrent_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipCtxPopCurrent.ctx = (hipCtx_t*)ctx; \
-};
-// hipGetSymbolAddress[('void**', 'devPtr'), ('const void*', 'symbol')]
-#define INIT_hipGetSymbolAddress_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipGetSymbolAddress.devPtr = (void**)devPtr; \
-  cb_data.args.hipGetSymbolAddress.symbol = (const void*)symbol; \
-};
-// hipHostGetFlags[('unsigned int*', 'flagsPtr'), ('void*', 'hostPtr')]
-#define INIT_hipHostGetFlags_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipHostGetFlags.flagsPtr = (unsigned int*)flagsPtr; \
-  cb_data.args.hipHostGetFlags.hostPtr = (void*)hostPtr; \
-};
-// hipHostMalloc[('void**', 'ptr'), ('size_t', 'size'), ('unsigned int', 'flags')]
-#define INIT_hipHostMalloc_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipHostMalloc.ptr = (void**)ptr; \
-  cb_data.args.hipHostMalloc.size = (size_t)sizeBytes; \
-  cb_data.args.hipHostMalloc.flags = (unsigned int)flags; \
-};
-// hipCtxSetSharedMemConfig[('hipSharedMemConfig', 'config')]
-#define INIT_hipCtxSetSharedMemConfig_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipCtxSetSharedMemConfig.config = (hipSharedMemConfig)config; \
-};
-// hipFreeMipmappedArray[('hipMipmappedArray_t', 'mipmappedArray')]
-#define INIT_hipFreeMipmappedArray_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipFreeMipmappedArray.mipmappedArray = (hipMipmappedArray_t)mipmappedArray; \
-};
-// hipMemGetInfo[('size_t*', 'free'), ('size_t*', 'total')]
-#define INIT_hipMemGetInfo_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemGetInfo.free = (size_t*)free; \
-  cb_data.args.hipMemGetInfo.total = (size_t*)total; \
-};
-// hipDeviceReset[]
-#define INIT_hipDeviceReset_CB_ARGS_DATA(cb_data) { \
-};
-// hipMemset[('void*', 'dst'), ('int', 'value'), ('size_t', 'sizeBytes')]
-#define INIT_hipMemset_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemset.dst = (void*)dst; \
-  cb_data.args.hipMemset.value = (int)value; \
-  cb_data.args.hipMemset.sizeBytes = (size_t)sizeBytes; \
-};
-// hipMemsetD8[('hipDeviceptr_t', 'dest'), ('unsigned char', 'value'), ('size_t', 'count')]
-#define INIT_hipMemsetD8_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemsetD8.dest = (hipDeviceptr_t)dst; \
-  cb_data.args.hipMemsetD8.value = (unsigned char)value; \
-  cb_data.args.hipMemsetD8.count = (size_t)count; \
-};
-// hipMemcpyParam2DAsync[('const hip_Memcpy2D*', 'pCopy'), ('hipStream_t', 'stream')]
-#define INIT_hipMemcpyParam2DAsync_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemcpyParam2DAsync.pCopy = (const hip_Memcpy2D*)pCopy; \
-  cb_data.args.hipMemcpyParam2DAsync.stream = (hipStream_t)stream; \
-};
-// hipHostRegister[('void*', 'hostPtr'), ('size_t', 'sizeBytes'), ('unsigned int', 'flags')]
-#define INIT_hipHostRegister_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipHostRegister.hostPtr = (void*)hostPtr; \
-  cb_data.args.hipHostRegister.sizeBytes = (size_t)sizeBytes; \
-  cb_data.args.hipHostRegister.flags = (unsigned int)flags; \
-};
-// hipDriverGetVersion[('int*', 'driverVersion')]
-#define INIT_hipDriverGetVersion_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipDriverGetVersion.driverVersion = (int*)driverVersion; \
-};
-// hipArray3DCreate[('hipArray**', 'array'), ('const HIP_ARRAY3D_DESCRIPTOR*', 'pAllocateArray')]
-#define INIT_hipArray3DCreate_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipArray3DCreate.array = (hipArray**)array; \
-  cb_data.args.hipArray3DCreate.pAllocateArray = (const HIP_ARRAY3D_DESCRIPTOR*)pAllocateArray; \
-};
-// hipIpcOpenMemHandle[('void**', 'devPtr'), ('hipIpcMemHandle_t', 'handle'), ('unsigned int', 'flags')]
-#define INIT_hipIpcOpenMemHandle_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipIpcOpenMemHandle.devPtr = (void**)dev_ptr; \
-  cb_data.args.hipIpcOpenMemHandle.handle = (hipIpcMemHandle_t)handle; \
-  cb_data.args.hipIpcOpenMemHandle.flags = (unsigned int)flags; \
-};
-// hipGetLastError[]
-#define INIT_hipGetLastError_CB_ARGS_DATA(cb_data) { \
-};
-// hipGetDeviceFlags[('unsigned int*', 'flags')]
-#define INIT_hipGetDeviceFlags_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipGetDeviceFlags.flags = (unsigned int*)flags; \
-};
-// hipDeviceGetSharedMemConfig[('hipSharedMemConfig*', 'pConfig')]
-#define INIT_hipDeviceGetSharedMemConfig_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipDeviceGetSharedMemConfig.pConfig = (hipSharedMemConfig*)pConfig; \
-};
-// hipDrvMemcpy3D[('const HIP_MEMCPY3D*', 'pCopy')]
-#define INIT_hipDrvMemcpy3D_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipDrvMemcpy3D.pCopy = (const HIP_MEMCPY3D*)pCopy; \
-};
-// hipMemcpy2DFromArray[('void*', 'dst'), ('size_t', 'dpitch'), ('hipArray_const_t', 'src'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind')]
-#define INIT_hipMemcpy2DFromArray_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemcpy2DFromArray.dst = (void*)dst; \
-  cb_data.args.hipMemcpy2DFromArray.dpitch = (size_t)dpitch; \
-  cb_data.args.hipMemcpy2DFromArray.src = (hipArray_const_t)src; \
-  cb_data.args.hipMemcpy2DFromArray.wOffset = (size_t)wOffsetSrc; \
-  cb_data.args.hipMemcpy2DFromArray.hOffset = (size_t)hOffset; \
-  cb_data.args.hipMemcpy2DFromArray.width = (size_t)width; \
-  cb_data.args.hipMemcpy2DFromArray.height = (size_t)height; \
-  cb_data.args.hipMemcpy2DFromArray.kind = (hipMemcpyKind)kind; \
-};
-// hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags[('int*', 'numBlocks'), ('const void*', 'f'), ('int', 'blockSize'), ('size_t', 'dynamicSMemSize'), ('unsigned int', 'flags')]
-#define INIT_hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks = (int*)numBlocks; \
-  cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.f = (const void*)f; \
-  cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.blockSize = (int)blockSize; \
-  cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.dynamicSMemSize = (size_t)dynamicSMemSize; \
-  cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.flags = (unsigned int)flags; \
-};
-// hipSetDeviceFlags[('unsigned int', 'flags')]
-#define INIT_hipSetDeviceFlags_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipSetDeviceFlags.flags = (unsigned int)flags; \
-};
-// hipHccModuleLaunchKernel[('hipFunction_t', 'f'), ('unsigned int', 'globalWorkSizeX'), ('unsigned int', 'globalWorkSizeY'), ('unsigned int', 'globalWorkSizeZ'), ('unsigned int', 'blockDimX'), ('unsigned int', 'blockDimY'), ('unsigned int', 'blockDimZ'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'hStream'), ('void**', 'kernelParams'), ('void**', 'extra'), ('hipEvent_t', 'startEvent'), ('hipEvent_t', 'stopEvent')]
-#define INIT_hipHccModuleLaunchKernel_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipHccModuleLaunchKernel.f = (hipFunction_t)f; \
-  cb_data.args.hipHccModuleLaunchKernel.globalWorkSizeX = (unsigned int)globalWorkSizeX; \
-  cb_data.args.hipHccModuleLaunchKernel.globalWorkSizeY = (unsigned int)globalWorkSizeY; \
-  cb_data.args.hipHccModuleLaunchKernel.globalWorkSizeZ = (unsigned int)globalWorkSizeZ; \
-  cb_data.args.hipHccModuleLaunchKernel.blockDimX = (unsigned int)blockDimX; \
-  cb_data.args.hipHccModuleLaunchKernel.blockDimY = (unsigned int)blockDimY; \
-  cb_data.args.hipHccModuleLaunchKernel.blockDimZ = (unsigned int)blockDimZ; \
-  cb_data.args.hipHccModuleLaunchKernel.sharedMemBytes = (size_t)sharedMemBytes; \
-  cb_data.args.hipHccModuleLaunchKernel.hStream = (hipStream_t)hStream; \
-  cb_data.args.hipHccModuleLaunchKernel.kernelParams = (void**)kernelParams; \
-  cb_data.args.hipHccModuleLaunchKernel.extra = (void**)extra; \
-  cb_data.args.hipHccModuleLaunchKernel.startEvent = (hipEvent_t)startEvent; \
-  cb_data.args.hipHccModuleLaunchKernel.stopEvent = (hipEvent_t)stopEvent; \
-};
-// hipFree[('void*', 'ptr')]
-#define INIT_hipFree_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipFree.ptr = (void*)ptr; \
-};
-// hipOccupancyMaxPotentialBlockSize[('int*', 'gridSize'), ('int*', 'blockSize'), ('const void*', 'f'), ('size_t', 'dynSharedMemPerBlk'), ('int', 'blockSizeLimit')]
-#define INIT_hipOccupancyMaxPotentialBlockSize_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipOccupancyMaxPotentialBlockSize.gridSize = (int*)gridSize; \
-  cb_data.args.hipOccupancyMaxPotentialBlockSize.blockSize = (int*)blockSize; \
-  cb_data.args.hipOccupancyMaxPotentialBlockSize.f = (const void*)f; \
-  cb_data.args.hipOccupancyMaxPotentialBlockSize.dynSharedMemPerBlk = (size_t)dynSharedMemPerBlk; \
-  cb_data.args.hipOccupancyMaxPotentialBlockSize.blockSizeLimit = (int)blockSizeLimit; \
-};
-// hipDeviceGetAttribute[('int*', 'pi'), ('hipDeviceAttribute_t', 'attr'), ('int', 'deviceId')]
-#define INIT_hipDeviceGetAttribute_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipDeviceGetAttribute.pi = (int*)pi; \
-  cb_data.args.hipDeviceGetAttribute.attr = (hipDeviceAttribute_t)attr; \
-  cb_data.args.hipDeviceGetAttribute.deviceId = (int)device; \
-};
-// hipDeviceComputeCapability[('int*', 'major'), ('int*', 'minor'), ('hipDevice_t', 'device')]
-#define INIT_hipDeviceComputeCapability_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipDeviceComputeCapability.major = (int*)major; \
-  cb_data.args.hipDeviceComputeCapability.minor = (int*)minor; \
-  cb_data.args.hipDeviceComputeCapability.device = (hipDevice_t)device; \
-};
-// hipCtxDisablePeerAccess[('hipCtx_t', 'peerCtx')]
-#define INIT_hipCtxDisablePeerAccess_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipCtxDisablePeerAccess.peerCtx = (hipCtx_t)peerCtx; \
-};
-// hipMallocManaged[('void**', 'dev_ptr'), ('size_t', 'size'), ('unsigned int', 'flags')]
-#define INIT_hipMallocManaged_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMallocManaged.dev_ptr = (void**)dev_ptr; \
-  cb_data.args.hipMallocManaged.size = (size_t)size; \
-  cb_data.args.hipMallocManaged.flags = (unsigned int)flags; \
-};
-// hipDeviceGetByPCIBusId[('int*', 'device'), ('const char*', 'pciBusId')]
-#define INIT_hipDeviceGetByPCIBusId_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipDeviceGetByPCIBusId.device = (int*)device; \
-  cb_data.args.hipDeviceGetByPCIBusId.pciBusId = (pciBusIdstr) ? strdup(pciBusIdstr) : NULL; \
-};
-// hipIpcGetMemHandle[('hipIpcMemHandle_t*', 'handle'), ('void*', 'devPtr')]
-#define INIT_hipIpcGetMemHandle_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipIpcGetMemHandle.handle = (hipIpcMemHandle_t*)handle; \
-  cb_data.args.hipIpcGetMemHandle.devPtr = (void*)dev_ptr; \
-};
-// hipMemcpyHtoDAsync[('hipDeviceptr_t', 'dst'), ('void*', 'src'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')]
-#define INIT_hipMemcpyHtoDAsync_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemcpyHtoDAsync.dst = (hipDeviceptr_t)dstDevice; \
-  cb_data.args.hipMemcpyHtoDAsync.src = (void*)srcHost; \
-  cb_data.args.hipMemcpyHtoDAsync.sizeBytes = (size_t)ByteCount; \
-  cb_data.args.hipMemcpyHtoDAsync.stream = (hipStream_t)stream; \
-};
-// hipCtxGetDevice[('hipDevice_t*', 'device')]
-#define INIT_hipCtxGetDevice_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipCtxGetDevice.device = (hipDevice_t*)device; \
-};
-// hipMemcpyDtoD[('hipDeviceptr_t', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes')]
-#define INIT_hipMemcpyDtoD_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemcpyDtoD.dst = (hipDeviceptr_t)dstDevice; \
-  cb_data.args.hipMemcpyDtoD.src = (hipDeviceptr_t)srcDevice; \
-  cb_data.args.hipMemcpyDtoD.sizeBytes = (size_t)ByteCount; \
-};
-// hipModuleLoadData[('hipModule_t*', 'module'), ('const void*', 'image')]
-#define INIT_hipModuleLoadData_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipModuleLoadData.module = (hipModule_t*)module; \
-  cb_data.args.hipModuleLoadData.image = (const void*)image; \
-};
-// hipDevicePrimaryCtxRelease[('hipDevice_t', 'dev')]
-#define INIT_hipDevicePrimaryCtxRelease_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipDevicePrimaryCtxRelease.dev = (hipDevice_t)dev; \
-};
-// hipOccupancyMaxActiveBlocksPerMultiprocessor[('int*', 'numBlocks'), ('const void*', 'f'), ('int', 'blockSize'), ('size_t', 'dynamicSMemSize')]
-#define INIT_hipOccupancyMaxActiveBlocksPerMultiprocessor_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks = (int*)numBlocks; \
-  cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessor.f = (const void*)f; \
-  cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessor.blockSize = (int)blockSize; \
-  cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessor.dynamicSMemSize = (size_t)dynamicSMemSize; \
-};
-// hipCtxSetCurrent[('hipCtx_t', 'ctx')]
-#define INIT_hipCtxSetCurrent_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipCtxSetCurrent.ctx = (hipCtx_t)ctx; \
-};
-// hipGetErrorString[]
-#define INIT_hipGetErrorString_CB_ARGS_DATA(cb_data) { \
-};
-// hipStreamCreate[('hipStream_t*', 'stream')]
-#define INIT_hipStreamCreate_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipStreamCreate.stream = (hipStream_t*)stream; \
-};
-// hipDevicePrimaryCtxRetain[('hipCtx_t*', 'pctx'), ('hipDevice_t', 'dev')]
-#define INIT_hipDevicePrimaryCtxRetain_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipDevicePrimaryCtxRetain.pctx = (hipCtx_t*)pctx; \
-  cb_data.args.hipDevicePrimaryCtxRetain.dev = (hipDevice_t)dev; \
-};
-// hipDeviceGet[('hipDevice_t*', 'device'), ('int', 'ordinal')]
-#define INIT_hipDeviceGet_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipDeviceGet.device = (hipDevice_t*)device; \
-  cb_data.args.hipDeviceGet.ordinal = (int)deviceId; \
-};
-// hipStreamCreateWithFlags[('hipStream_t*', 'stream'), ('unsigned int', 'flags')]
-#define INIT_hipStreamCreateWithFlags_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipStreamCreateWithFlags.stream = (hipStream_t*)stream; \
-  cb_data.args.hipStreamCreateWithFlags.flags = (unsigned int)flags; \
-};
-// hipMemcpyFromArray[('void*', 'dst'), ('hipArray_const_t', 'srcArray'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')]
-#define INIT_hipMemcpyFromArray_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemcpyFromArray.dst = (void*)dst; \
-  cb_data.args.hipMemcpyFromArray.srcArray = (hipArray_const_t)src; \
-  cb_data.args.hipMemcpyFromArray.wOffset = (size_t)wOffsetSrc; \
-  cb_data.args.hipMemcpyFromArray.hOffset = (size_t)hOffset; \
-  cb_data.args.hipMemcpyFromArray.count = (size_t)count; \
-  cb_data.args.hipMemcpyFromArray.kind = (hipMemcpyKind)kind; \
-};
-// hipMemcpy2DAsync[('void*', 'dst'), ('size_t', 'dpitch'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')]
-#define INIT_hipMemcpy2DAsync_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemcpy2DAsync.dst = (void*)dst; \
-  cb_data.args.hipMemcpy2DAsync.dpitch = (size_t)dpitch; \
-  cb_data.args.hipMemcpy2DAsync.src = (const void*)src; \
-  cb_data.args.hipMemcpy2DAsync.spitch = (size_t)spitch; \
-  cb_data.args.hipMemcpy2DAsync.width = (size_t)width; \
-  cb_data.args.hipMemcpy2DAsync.height = (size_t)height; \
-  cb_data.args.hipMemcpy2DAsync.kind = (hipMemcpyKind)kind; \
-  cb_data.args.hipMemcpy2DAsync.stream = (hipStream_t)stream; \
-};
-// hipFuncGetAttributes[('hipFuncAttributes*', 'attr'), ('const void*', 'func')]
-#define INIT_hipFuncGetAttributes_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipFuncGetAttributes.attr = (hipFuncAttributes*)attr; \
-  cb_data.args.hipFuncGetAttributes.func = (const void*)func; \
-};
-// hipGetSymbolSize[('size_t*', 'size'), ('const void*', 'symbol')]
-#define INIT_hipGetSymbolSize_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipGetSymbolSize.size = (size_t*)sizePtr; \
-  cb_data.args.hipGetSymbolSize.symbol = (const void*)symbol; \
-};
-// hipHostFree[('void*', 'ptr')]
-#define INIT_hipHostFree_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipHostFree.ptr = (void*)ptr; \
-};
-// hipEventCreateWithFlags[('hipEvent_t*', 'event'), ('unsigned int', 'flags')]
-#define INIT_hipEventCreateWithFlags_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipEventCreateWithFlags.event = (hipEvent_t*)event; \
-  cb_data.args.hipEventCreateWithFlags.flags = (unsigned int)flags; \
-};
-// hipStreamQuery[('hipStream_t', 'stream')]
-#define INIT_hipStreamQuery_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipStreamQuery.stream = (hipStream_t)stream; \
-};
-// hipMemcpy3D[('const hipMemcpy3DParms*', 'p')]
-#define INIT_hipMemcpy3D_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemcpy3D.p = (const hipMemcpy3DParms*)p; \
-};
-// hipMemcpyToSymbol[('const void*', 'symbol'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')]
-#define INIT_hipMemcpyToSymbol_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemcpyToSymbol.symbol = (const void*)symbol; \
-  cb_data.args.hipMemcpyToSymbol.src = (const void*)src; \
-  cb_data.args.hipMemcpyToSymbol.sizeBytes = (size_t)sizeBytes; \
-  cb_data.args.hipMemcpyToSymbol.offset = (size_t)offset; \
-  cb_data.args.hipMemcpyToSymbol.kind = (hipMemcpyKind)kind; \
-};
-// hipMemcpy[('void*', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipMemcpyKind', 'kind')]
-#define INIT_hipMemcpy_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemcpy.dst = (void*)dst; \
-  cb_data.args.hipMemcpy.src = (const void*)src; \
-  cb_data.args.hipMemcpy.sizeBytes = (size_t)sizeBytes; \
-  cb_data.args.hipMemcpy.kind = (hipMemcpyKind)kind; \
-};
-// hipPeekAtLastError[]
-#define INIT_hipPeekAtLastError_CB_ARGS_DATA(cb_data) { \
-};
-// hipExtLaunchMultiKernelMultiDevice[('hipLaunchParams*', 'launchParamsList'), ('int', 'numDevices'), ('unsigned int', 'flags')]
-#define INIT_hipExtLaunchMultiKernelMultiDevice_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipExtLaunchMultiKernelMultiDevice.launchParamsList = (hipLaunchParams*)launchParamsList; \
-  cb_data.args.hipExtLaunchMultiKernelMultiDevice.numDevices = (int)numDevices; \
-  cb_data.args.hipExtLaunchMultiKernelMultiDevice.flags = (unsigned int)flags; \
-};
-// hipHostAlloc[('void**', 'ptr'), ('size_t', 'size'), ('unsigned int', 'flags')]
-#define INIT_hipHostAlloc_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipHostAlloc.ptr = (void**)ptr; \
-  cb_data.args.hipHostAlloc.size = (size_t)sizeBytes; \
-  cb_data.args.hipHostAlloc.flags = (unsigned int)flags; \
-};
-// hipStreamAddCallback[('hipStream_t', 'stream'), ('hipStreamCallback_t', 'callback'), ('void*', 'userData'), ('unsigned int', 'flags')]
-#define INIT_hipStreamAddCallback_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipStreamAddCallback.stream = (hipStream_t)stream; \
-  cb_data.args.hipStreamAddCallback.callback = (hipStreamCallback_t)callback; \
-  cb_data.args.hipStreamAddCallback.userData = (void*)userData; \
-  cb_data.args.hipStreamAddCallback.flags = (unsigned int)flags; \
-};
-// hipMemcpyToArray[('hipArray*', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')]
-#define INIT_hipMemcpyToArray_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemcpyToArray.dst = (hipArray*)dst; \
-  cb_data.args.hipMemcpyToArray.wOffset = (size_t)wOffset; \
-  cb_data.args.hipMemcpyToArray.hOffset = (size_t)hOffset; \
-  cb_data.args.hipMemcpyToArray.src = (const void*)src; \
-  cb_data.args.hipMemcpyToArray.count = (size_t)count; \
-  cb_data.args.hipMemcpyToArray.kind = (hipMemcpyKind)kind; \
-};
-// hipMemsetD32[('hipDeviceptr_t', 'dest'), ('int', 'value'), ('size_t', 'count')]
-#define INIT_hipMemsetD32_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemsetD32.dest = (hipDeviceptr_t)dst; \
-  cb_data.args.hipMemsetD32.value = (int)value; \
-  cb_data.args.hipMemsetD32.count = (size_t)count; \
-};
-// hipExtModuleLaunchKernel[('hipFunction_t', 'f'), ('unsigned int', 'globalWorkSizeX'), ('unsigned int', 'globalWorkSizeY'), ('unsigned int', 'globalWorkSizeZ'), ('unsigned int', 'localWorkSizeX'), ('unsigned int', 'localWorkSizeY'), ('unsigned int', 'localWorkSizeZ'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'hStream'), ('void**', 'kernelParams'), ('void**', 'extra'), ('hipEvent_t', 'startEvent'), ('hipEvent_t', 'stopEvent'), ('unsigned int', 'flags')]
-#define INIT_hipExtModuleLaunchKernel_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipExtModuleLaunchKernel.f = (hipFunction_t)f; \
-  cb_data.args.hipExtModuleLaunchKernel.globalWorkSizeX = (unsigned int)globalWorkSizeX; \
-  cb_data.args.hipExtModuleLaunchKernel.globalWorkSizeY = (unsigned int)globalWorkSizeY; \
-  cb_data.args.hipExtModuleLaunchKernel.globalWorkSizeZ = (unsigned int)globalWorkSizeZ; \
-  cb_data.args.hipExtModuleLaunchKernel.localWorkSizeX = (unsigned int)localWorkSizeX; \
-  cb_data.args.hipExtModuleLaunchKernel.localWorkSizeY = (unsigned int)localWorkSizeY; \
-  cb_data.args.hipExtModuleLaunchKernel.localWorkSizeZ = (unsigned int)localWorkSizeZ; \
-  cb_data.args.hipExtModuleLaunchKernel.sharedMemBytes = (size_t)sharedMemBytes; \
-  cb_data.args.hipExtModuleLaunchKernel.hStream = (hipStream_t)hStream; \
-  cb_data.args.hipExtModuleLaunchKernel.kernelParams = (void**)kernelParams; \
-  cb_data.args.hipExtModuleLaunchKernel.extra = (void**)extra; \
-  cb_data.args.hipExtModuleLaunchKernel.startEvent = (hipEvent_t)startEvent; \
-  cb_data.args.hipExtModuleLaunchKernel.stopEvent = (hipEvent_t)stopEvent; \
-  cb_data.args.hipExtModuleLaunchKernel.flags = (unsigned int)flags; \
-};
-// hipDeviceSynchronize[]
-#define INIT_hipDeviceSynchronize_CB_ARGS_DATA(cb_data) { \
-};
-// hipDeviceGetCacheConfig[('hipFuncCache_t*', 'cacheConfig')]
-#define INIT_hipDeviceGetCacheConfig_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipDeviceGetCacheConfig.cacheConfig = (hipFuncCache_t*)cacheConfig; \
-};
-// hipMalloc3D[('hipPitchedPtr*', 'pitchedDevPtr'), ('hipExtent', 'extent')]
-#define INIT_hipMalloc3D_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMalloc3D.pitchedDevPtr = (hipPitchedPtr*)pitchedDevPtr; \
-  cb_data.args.hipMalloc3D.extent = (hipExtent)extent; \
-};
-// hipPointerGetAttributes[('hipPointerAttribute_t*', 'attributes'), ('const void*', 'ptr')]
-#define INIT_hipPointerGetAttributes_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipPointerGetAttributes.attributes = (hipPointerAttribute_t*)attributes; \
-  cb_data.args.hipPointerGetAttributes.ptr = (const void*)ptr; \
-};
-// hipMemsetAsync[('void*', 'dst'), ('int', 'value'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')]
-#define INIT_hipMemsetAsync_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemsetAsync.dst = (void*)dst; \
-  cb_data.args.hipMemsetAsync.value = (int)value; \
-  cb_data.args.hipMemsetAsync.sizeBytes = (size_t)sizeBytes; \
-  cb_data.args.hipMemsetAsync.stream = (hipStream_t)stream; \
-};
-// hipDeviceGetName[('char*', 'name'), ('int', 'len'), ('hipDevice_t', 'device')]
-#define INIT_hipDeviceGetName_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipDeviceGetName.name = (char*)name; \
-  cb_data.args.hipDeviceGetName.len = (int)len; \
-  cb_data.args.hipDeviceGetName.device = (hipDevice_t)device; \
-};
-// hipModuleOccupancyMaxPotentialBlockSizeWithFlags[('int*', 'gridSize'), ('int*', 'blockSize'), ('hipFunction_t', 'f'), ('size_t', 'dynSharedMemPerBlk'), ('int', 'blockSizeLimit'), ('unsigned int', 'flags')]
-#define INIT_hipModuleOccupancyMaxPotentialBlockSizeWithFlags_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.gridSize = (int*)gridSize; \
-  cb_data.args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSize = (int*)blockSize; \
-  cb_data.args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.f = (hipFunction_t)f; \
-  cb_data.args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.dynSharedMemPerBlk = (size_t)dynSharedMemPerBlk; \
-  cb_data.args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSizeLimit = (int)blockSizeLimit; \
-  cb_data.args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.flags = (unsigned int)flags; \
-};
-// hipCtxPushCurrent[('hipCtx_t', 'ctx')]
-#define INIT_hipCtxPushCurrent_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipCtxPushCurrent.ctx = (hipCtx_t)ctx; \
-};
-// hipMemcpyPeer[('void*', 'dst'), ('int', 'dstDeviceId'), ('const void*', 'src'), ('int', 'srcDeviceId'), ('size_t', 'sizeBytes')]
-#define INIT_hipMemcpyPeer_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemcpyPeer.dst = (void*)dst; \
-  cb_data.args.hipMemcpyPeer.dstDeviceId = (int)dstDevice; \
-  cb_data.args.hipMemcpyPeer.src = (const void*)src; \
-  cb_data.args.hipMemcpyPeer.srcDeviceId = (int)srcDevice; \
-  cb_data.args.hipMemcpyPeer.sizeBytes = (size_t)sizeBytes; \
-};
-// hipEventSynchronize[('hipEvent_t', 'event')]
-#define INIT_hipEventSynchronize_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipEventSynchronize.event = (hipEvent_t)event; \
-};
-// hipMemcpyDtoDAsync[('hipDeviceptr_t', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')]
-#define INIT_hipMemcpyDtoDAsync_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemcpyDtoDAsync.dst = (hipDeviceptr_t)dstDevice; \
-  cb_data.args.hipMemcpyDtoDAsync.src = (hipDeviceptr_t)srcDevice; \
-  cb_data.args.hipMemcpyDtoDAsync.sizeBytes = (size_t)ByteCount; \
-  cb_data.args.hipMemcpyDtoDAsync.stream = (hipStream_t)stream; \
-};
-// hipProfilerStart[]
-#define INIT_hipProfilerStart_CB_ARGS_DATA(cb_data) { \
-};
-// hipExtMallocWithFlags[('void**', 'ptr'), ('size_t', 'sizeBytes'), ('unsigned int', 'flags')]
-#define INIT_hipExtMallocWithFlags_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipExtMallocWithFlags.ptr = (void**)ptr; \
-  cb_data.args.hipExtMallocWithFlags.sizeBytes = (size_t)sizeBytes; \
-  cb_data.args.hipExtMallocWithFlags.flags = (unsigned int)flags; \
-};
-// hipCtxEnablePeerAccess[('hipCtx_t', 'peerCtx'), ('unsigned int', 'flags')]
-#define INIT_hipCtxEnablePeerAccess_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipCtxEnablePeerAccess.peerCtx = (hipCtx_t)peerCtx; \
-  cb_data.args.hipCtxEnablePeerAccess.flags = (unsigned int)flags; \
-};
-// hipMemAllocHost[('void**', 'ptr'), ('size_t', 'size')]
-#define INIT_hipMemAllocHost_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemAllocHost.ptr = (void**)ptr; \
-  cb_data.args.hipMemAllocHost.size = (size_t)size; \
-};
-// hipMemcpyDtoHAsync[('void*', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')]
-#define INIT_hipMemcpyDtoHAsync_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemcpyDtoHAsync.dst = (void*)dstHost; \
-  cb_data.args.hipMemcpyDtoHAsync.src = (hipDeviceptr_t)srcDevice; \
-  cb_data.args.hipMemcpyDtoHAsync.sizeBytes = (size_t)ByteCount; \
-  cb_data.args.hipMemcpyDtoHAsync.stream = (hipStream_t)stream; \
-};
-// hipModuleLaunchKernel[('hipFunction_t', 'f'), ('unsigned int', 'gridDimX'), ('unsigned int', 'gridDimY'), ('unsigned int', 'gridDimZ'), ('unsigned int', 'blockDimX'), ('unsigned int', 'blockDimY'), ('unsigned int', 'blockDimZ'), ('unsigned int', 'sharedMemBytes'), ('hipStream_t', 'stream'), ('void**', 'kernelParams'), ('void**', 'extra')]
-#define INIT_hipModuleLaunchKernel_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipModuleLaunchKernel.f = (hipFunction_t)f; \
-  cb_data.args.hipModuleLaunchKernel.gridDimX = (unsigned int)gridDimX; \
-  cb_data.args.hipModuleLaunchKernel.gridDimY = (unsigned int)gridDimY; \
-  cb_data.args.hipModuleLaunchKernel.gridDimZ = (unsigned int)gridDimZ; \
-  cb_data.args.hipModuleLaunchKernel.blockDimX = (unsigned int)blockDimX; \
-  cb_data.args.hipModuleLaunchKernel.blockDimY = (unsigned int)blockDimY; \
-  cb_data.args.hipModuleLaunchKernel.blockDimZ = (unsigned int)blockDimZ; \
-  cb_data.args.hipModuleLaunchKernel.sharedMemBytes = (unsigned int)sharedMemBytes; \
-  cb_data.args.hipModuleLaunchKernel.stream = (hipStream_t)hStream; \
-  cb_data.args.hipModuleLaunchKernel.kernelParams = (void**)kernelParams; \
-  cb_data.args.hipModuleLaunchKernel.extra = (void**)extra; \
-};
-// hipMemAllocPitch[('hipDeviceptr_t*', 'dptr'), ('size_t*', 'pitch'), ('size_t', 'widthInBytes'), ('size_t', 'height'), ('unsigned int', 'elementSizeBytes')]
-#define INIT_hipMemAllocPitch_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemAllocPitch.dptr = (hipDeviceptr_t*)dptr; \
-  cb_data.args.hipMemAllocPitch.pitch = (size_t*)pitch; \
-  cb_data.args.hipMemAllocPitch.widthInBytes = (size_t)widthInBytes; \
-  cb_data.args.hipMemAllocPitch.height = (size_t)height; \
-  cb_data.args.hipMemAllocPitch.elementSizeBytes = (unsigned int)elementSizeBytes; \
-};
-// hipExtLaunchKernel[('const void*', 'function_address'), ('dim3', 'numBlocks'), ('dim3', 'dimBlocks'), ('void**', 'args'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'stream'), ('hipEvent_t', 'startEvent'), ('hipEvent_t', 'stopEvent'), ('int', 'flags')]
-#define INIT_hipExtLaunchKernel_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipExtLaunchKernel.function_address = (const void*)hostFunction; \
-  cb_data.args.hipExtLaunchKernel.numBlocks = (dim3)gridDim; \
-  cb_data.args.hipExtLaunchKernel.dimBlocks = (dim3)blockDim; \
-  cb_data.args.hipExtLaunchKernel.args = (void**)args; \
-  cb_data.args.hipExtLaunchKernel.sharedMemBytes = (size_t)sharedMemBytes; \
-  cb_data.args.hipExtLaunchKernel.stream = (hipStream_t)stream; \
-  cb_data.args.hipExtLaunchKernel.startEvent = (hipEvent_t)startEvent; \
-  cb_data.args.hipExtLaunchKernel.stopEvent = (hipEvent_t)stopEvent; \
-  cb_data.args.hipExtLaunchKernel.flags = (int)flags; \
-};
-// hipMemcpy2DFromArrayAsync[('void*', 'dst'), ('size_t', 'dpitch'), ('hipArray_const_t', 'src'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')]
-#define INIT_hipMemcpy2DFromArrayAsync_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemcpy2DFromArrayAsync.dst = (void*)dst; \
-  cb_data.args.hipMemcpy2DFromArrayAsync.dpitch = (size_t)dpitch; \
-  cb_data.args.hipMemcpy2DFromArrayAsync.src = (hipArray_const_t)src; \
-  cb_data.args.hipMemcpy2DFromArrayAsync.wOffset = (size_t)wOffsetSrc; \
-  cb_data.args.hipMemcpy2DFromArrayAsync.hOffset = (size_t)hOffsetSrc; \
-  cb_data.args.hipMemcpy2DFromArrayAsync.width = (size_t)width; \
-  cb_data.args.hipMemcpy2DFromArrayAsync.height = (size_t)height; \
-  cb_data.args.hipMemcpy2DFromArrayAsync.kind = (hipMemcpyKind)kind; \
-  cb_data.args.hipMemcpy2DFromArrayAsync.stream = (hipStream_t)stream; \
-};
-// hipDeviceGetLimit[('size_t*', 'pValue'), ('hipLimit_t', 'limit')]
-#define INIT_hipDeviceGetLimit_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipDeviceGetLimit.pValue = (size_t*)pValue; \
-  cb_data.args.hipDeviceGetLimit.limit = (hipLimit_t)limit; \
-};
-// hipModuleLoadDataEx[('hipModule_t*', 'module'), ('const void*', 'image'), ('unsigned int', 'numOptions'), ('hipJitOption*', 'options'), ('void**', 'optionsValues')]
-#define INIT_hipModuleLoadDataEx_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipModuleLoadDataEx.module = (hipModule_t*)module; \
-  cb_data.args.hipModuleLoadDataEx.image = (const void*)image; \
-  cb_data.args.hipModuleLoadDataEx.numOptions = (unsigned int)numOptions; \
-  cb_data.args.hipModuleLoadDataEx.options = (hipJitOption*)options; \
-  cb_data.args.hipModuleLoadDataEx.optionsValues = (void**)optionsValues; \
-};
-// hipRuntimeGetVersion[('int*', 'runtimeVersion')]
-#define INIT_hipRuntimeGetVersion_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipRuntimeGetVersion.runtimeVersion = (int*)runtimeVersion; \
-};
-// hipMemRangeGetAttribute[('void*', 'data'), ('size_t', 'data_size'), ('hipMemRangeAttribute', 'attribute'), ('const void*', 'dev_ptr'), ('size_t', 'count')]
-#define INIT_hipMemRangeGetAttribute_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemRangeGetAttribute.data = (void*)data; \
-  cb_data.args.hipMemRangeGetAttribute.data_size = (size_t)data_size; \
-  cb_data.args.hipMemRangeGetAttribute.attribute = (hipMemRangeAttribute)attribute; \
-  cb_data.args.hipMemRangeGetAttribute.dev_ptr = (const void*)dev_ptr; \
-  cb_data.args.hipMemRangeGetAttribute.count = (size_t)count; \
-};
-// hipDeviceGetP2PAttribute[('int*', 'value'), ('hipDeviceP2PAttr', 'attr'), ('int', 'srcDevice'), ('int', 'dstDevice')]
-#define INIT_hipDeviceGetP2PAttribute_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipDeviceGetP2PAttribute.value = (int*)value; \
-  cb_data.args.hipDeviceGetP2PAttribute.attr = (hipDeviceP2PAttr)attr; \
-  cb_data.args.hipDeviceGetP2PAttribute.srcDevice = (int)srcDevice; \
-  cb_data.args.hipDeviceGetP2PAttribute.dstDevice = (int)dstDevice; \
-};
-// hipMemcpyPeerAsync[('void*', 'dst'), ('int', 'dstDeviceId'), ('const void*', 'src'), ('int', 'srcDevice'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')]
-#define INIT_hipMemcpyPeerAsync_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemcpyPeerAsync.dst = (void*)dst; \
-  cb_data.args.hipMemcpyPeerAsync.dstDeviceId = (int)dstDevice; \
-  cb_data.args.hipMemcpyPeerAsync.src = (const void*)src; \
-  cb_data.args.hipMemcpyPeerAsync.srcDevice = (int)srcDevice; \
-  cb_data.args.hipMemcpyPeerAsync.sizeBytes = (size_t)sizeBytes; \
-  cb_data.args.hipMemcpyPeerAsync.stream = (hipStream_t)stream; \
-};
-// hipGetDeviceProperties[('hipDeviceProp_t*', 'props'), ('hipDevice_t', 'device')]
-#define INIT_hipGetDeviceProperties_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipGetDeviceProperties.props = (hipDeviceProp_t*)props; \
-  cb_data.args.hipGetDeviceProperties.device = (hipDevice_t)device; \
-};
-// hipMemcpyDtoH[('void*', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes')]
-#define INIT_hipMemcpyDtoH_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemcpyDtoH.dst = (void*)dstHost; \
-  cb_data.args.hipMemcpyDtoH.src = (hipDeviceptr_t)srcDevice; \
-  cb_data.args.hipMemcpyDtoH.sizeBytes = (size_t)ByteCount; \
-};
-// hipMemcpyWithStream[('void*', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')]
-#define INIT_hipMemcpyWithStream_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemcpyWithStream.dst = (void*)dst; \
-  cb_data.args.hipMemcpyWithStream.src = (const void*)src; \
-  cb_data.args.hipMemcpyWithStream.sizeBytes = (size_t)sizeBytes; \
-  cb_data.args.hipMemcpyWithStream.kind = (hipMemcpyKind)kind; \
-  cb_data.args.hipMemcpyWithStream.stream = (hipStream_t)stream; \
-};
-// hipDeviceTotalMem[('size_t*', 'bytes'), ('hipDevice_t', 'device')]
-#define INIT_hipDeviceTotalMem_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipDeviceTotalMem.bytes = (size_t*)bytes; \
-  cb_data.args.hipDeviceTotalMem.device = (hipDevice_t)device; \
-};
-// hipHostGetDevicePointer[('void**', 'devPtr'), ('void*', 'hstPtr'), ('unsigned int', 'flags')]
-#define INIT_hipHostGetDevicePointer_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipHostGetDevicePointer.devPtr = (void**)devicePointer; \
-  cb_data.args.hipHostGetDevicePointer.hstPtr = (void*)hostPointer; \
-  cb_data.args.hipHostGetDevicePointer.flags = (unsigned int)flags; \
-};
-// hipMemRangeGetAttributes[('void**', 'data'), ('size_t*', 'data_sizes'), ('hipMemRangeAttribute*', 'attributes'), ('size_t', 'num_attributes'), ('const void*', 'dev_ptr'), ('size_t', 'count')]
-#define INIT_hipMemRangeGetAttributes_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemRangeGetAttributes.data = (void**)data; \
-  cb_data.args.hipMemRangeGetAttributes.data_sizes = (size_t*)data_sizes; \
-  cb_data.args.hipMemRangeGetAttributes.attributes = (hipMemRangeAttribute*)attributes; \
-  cb_data.args.hipMemRangeGetAttributes.num_attributes = (size_t)num_attributes; \
-  cb_data.args.hipMemRangeGetAttributes.dev_ptr = (const void*)dev_ptr; \
-  cb_data.args.hipMemRangeGetAttributes.count = (size_t)count; \
-};
-// hipMemcpyParam2D[('const hip_Memcpy2D*', 'pCopy')]
-#define INIT_hipMemcpyParam2D_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemcpyParam2D.pCopy = (const hip_Memcpy2D*)pCopy; \
-};
-// hipDevicePrimaryCtxReset[('hipDevice_t', 'dev')]
-#define INIT_hipDevicePrimaryCtxReset_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipDevicePrimaryCtxReset.dev = (hipDevice_t)dev; \
-};
-// hipGetMipmappedArrayLevel[('hipArray_t*', 'levelArray'), ('hipMipmappedArray_const_t', 'mipmappedArray'), ('unsigned int', 'level')]
-#define INIT_hipGetMipmappedArrayLevel_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipGetMipmappedArrayLevel.levelArray = (hipArray_t*)levelArray; \
-  cb_data.args.hipGetMipmappedArrayLevel.mipmappedArray = (hipMipmappedArray_const_t)mipmappedArray; \
-  cb_data.args.hipGetMipmappedArrayLevel.level = (unsigned int)level; \
-};
-// hipMemsetD32Async[('hipDeviceptr_t', 'dst'), ('int', 'value'), ('size_t', 'count'), ('hipStream_t', 'stream')]
-#define INIT_hipMemsetD32Async_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipMemsetD32Async.dst = (hipDeviceptr_t)dst; \
-  cb_data.args.hipMemsetD32Async.value = (int)value; \
-  cb_data.args.hipMemsetD32Async.count = (size_t)count; \
-  cb_data.args.hipMemsetD32Async.stream = (hipStream_t)stream; \
-};
-// hipGetDevice[('int*', 'deviceId')]
-#define INIT_hipGetDevice_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipGetDevice.deviceId = (int*)deviceId; \
-};
-// hipGetDeviceCount[('int*', 'count')]
-#define INIT_hipGetDeviceCount_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipGetDeviceCount.count = (int*)count; \
-};
-// hipIpcOpenEventHandle[('hipEvent_t*', 'event'), ('hipIpcEventHandle_t', 'handle')]
-#define INIT_hipIpcOpenEventHandle_CB_ARGS_DATA(cb_data) { \
-  cb_data.args.hipIpcOpenEventHandle.event = (hipEvent_t*)event; \
-  cb_data.args.hipIpcOpenEventHandle.handle = (hipIpcEventHandle_t)handle; \
-};
-#define INIT_CB_ARGS_DATA(cb_id, cb_data) INIT_##cb_id##_CB_ARGS_DATA(cb_data)
-#if HIP_PROF_HIP_API_STRING
-
-// HIP API args filling method
-static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) {
-  switch (id) {
-// hipDrvMemcpy3DAsync[('const HIP_MEMCPY3D*', 'pCopy'), ('hipStream_t', 'stream')]
-    case HIP_API_ID_hipDrvMemcpy3DAsync:
-      if (data->args.hipDrvMemcpy3DAsync.pCopy) data->args.hipDrvMemcpy3DAsync.pCopy__val = *(data->args.hipDrvMemcpy3DAsync.pCopy);
-      break;
-// hipDeviceEnablePeerAccess[('int', 'peerDeviceId'), ('unsigned int', 'flags')]
-    case HIP_API_ID_hipDeviceEnablePeerAccess:
-      break;
-// hipFuncSetSharedMemConfig[('const void*', 'func'), ('hipSharedMemConfig', 'config')]
-    case HIP_API_ID_hipFuncSetSharedMemConfig:
-      break;
-// hipMemcpyToSymbolAsync[('const void*', 'symbol'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')]
-    case HIP_API_ID_hipMemcpyToSymbolAsync:
-      break;
-// hipMallocPitch[('void**', 'ptr'), ('size_t*', 'pitch'), ('size_t', 'width'), ('size_t', 'height')]
-    case HIP_API_ID_hipMallocPitch:
-      if (data->args.hipMallocPitch.ptr) data->args.hipMallocPitch.ptr__val = *(data->args.hipMallocPitch.ptr);
-      if (data->args.hipMallocPitch.pitch) data->args.hipMallocPitch.pitch__val = *(data->args.hipMallocPitch.pitch);
-      break;
-// hipMalloc[('void**', 'ptr'), ('size_t', 'size')]
-    case HIP_API_ID_hipMalloc:
-      if (data->args.hipMalloc.ptr) data->args.hipMalloc.ptr__val = *(data->args.hipMalloc.ptr);
-      break;
-// hipMemsetD16[('hipDeviceptr_t', 'dest'), ('unsigned short', 'value'), ('size_t', 'count')]
-    case HIP_API_ID_hipMemsetD16:
-      break;
-// hipExtStreamGetCUMask[('hipStream_t', 'stream'), ('unsigned int', 'cuMaskSize'), ('unsigned int*', 'cuMask')]
-    case HIP_API_ID_hipExtStreamGetCUMask:
-      if (data->args.hipExtStreamGetCUMask.cuMask) data->args.hipExtStreamGetCUMask.cuMask__val = *(data->args.hipExtStreamGetCUMask.cuMask);
-      break;
-// hipEventRecord[('hipEvent_t', 'event'), ('hipStream_t', 'stream')]
-    case HIP_API_ID_hipEventRecord:
-      break;
-// hipCtxSynchronize[]
-    case HIP_API_ID_hipCtxSynchronize:
-      break;
-// hipSetDevice[('int', 'deviceId')]
-    case HIP_API_ID_hipSetDevice:
-      break;
-// hipCtxGetApiVersion[('hipCtx_t', 'ctx'), ('int*', 'apiVersion')]
-    case HIP_API_ID_hipCtxGetApiVersion:
-      if (data->args.hipCtxGetApiVersion.apiVersion) data->args.hipCtxGetApiVersion.apiVersion__val = *(data->args.hipCtxGetApiVersion.apiVersion);
-      break;
-// hipMemcpyFromSymbolAsync[('void*', 'dst'), ('const void*', 'symbol'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')]
-    case HIP_API_ID_hipMemcpyFromSymbolAsync:
-      break;
-// hipExtGetLinkTypeAndHopCount[('int', 'device1'), ('int', 'device2'), ('unsigned int*', 'linktype'), ('unsigned int*', 'hopcount')]
-    case HIP_API_ID_hipExtGetLinkTypeAndHopCount:
-      if (data->args.hipExtGetLinkTypeAndHopCount.linktype) data->args.hipExtGetLinkTypeAndHopCount.linktype__val = *(data->args.hipExtGetLinkTypeAndHopCount.linktype);
-      if (data->args.hipExtGetLinkTypeAndHopCount.hopcount) data->args.hipExtGetLinkTypeAndHopCount.hopcount__val = *(data->args.hipExtGetLinkTypeAndHopCount.hopcount);
-      break;
-// __hipPopCallConfiguration[('dim3*', 'gridDim'), ('dim3*', 'blockDim'), ('size_t*', 'sharedMem'), ('hipStream_t*', 'stream')]
-    case HIP_API_ID___hipPopCallConfiguration:
-      if (data->args.__hipPopCallConfiguration.gridDim) data->args.__hipPopCallConfiguration.gridDim__val = *(data->args.__hipPopCallConfiguration.gridDim);
-      if (data->args.__hipPopCallConfiguration.blockDim) data->args.__hipPopCallConfiguration.blockDim__val = *(data->args.__hipPopCallConfiguration.blockDim);
-      if (data->args.__hipPopCallConfiguration.sharedMem) data->args.__hipPopCallConfiguration.sharedMem__val = *(data->args.__hipPopCallConfiguration.sharedMem);
-      if (data->args.__hipPopCallConfiguration.stream) data->args.__hipPopCallConfiguration.stream__val = *(data->args.__hipPopCallConfiguration.stream);
-      break;
-// hipModuleOccupancyMaxActiveBlocksPerMultiprocessor[('int*', 'numBlocks'), ('hipFunction_t', 'f'), ('int', 'blockSize'), ('size_t', 'dynSharedMemPerBlk')]
-    case HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor:
-      if (data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks) data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks__val = *(data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks);
-      break;
-// hipMemset3D[('hipPitchedPtr', 'pitchedDevPtr'), ('int', 'value'), ('hipExtent', 'extent')]
-    case HIP_API_ID_hipMemset3D:
-      break;
-// hipStreamCreateWithPriority[('hipStream_t*', 'stream'), ('unsigned int', 'flags'), ('int', 'priority')]
-    case HIP_API_ID_hipStreamCreateWithPriority:
-      if (data->args.hipStreamCreateWithPriority.stream) data->args.hipStreamCreateWithPriority.stream__val = *(data->args.hipStreamCreateWithPriority.stream);
-      break;
-// hipMemcpy2DToArray[('hipArray*', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind')]
-    case HIP_API_ID_hipMemcpy2DToArray:
-      if (data->args.hipMemcpy2DToArray.dst) data->args.hipMemcpy2DToArray.dst__val = *(data->args.hipMemcpy2DToArray.dst);
-      break;
-// hipMemsetD8Async[('hipDeviceptr_t', 'dest'), ('unsigned char', 'value'), ('size_t', 'count'), ('hipStream_t', 'stream')]
-    case HIP_API_ID_hipMemsetD8Async:
-      break;
-// hipCtxGetCacheConfig[('hipFuncCache_t*', 'cacheConfig')]
-    case HIP_API_ID_hipCtxGetCacheConfig:
-      if (data->args.hipCtxGetCacheConfig.cacheConfig) data->args.hipCtxGetCacheConfig.cacheConfig__val = *(data->args.hipCtxGetCacheConfig.cacheConfig);
-      break;
-// hipModuleGetFunction[('hipFunction_t*', 'function'), ('hipModule_t', 'module'), ('const char*', 'kname')]
-    case HIP_API_ID_hipModuleGetFunction:
-      if (data->args.hipModuleGetFunction.function) data->args.hipModuleGetFunction.function__val = *(data->args.hipModuleGetFunction.function);
-      if (data->args.hipModuleGetFunction.kname) data->args.hipModuleGetFunction.kname__val = *(data->args.hipModuleGetFunction.kname);
-      break;
-// hipStreamWaitEvent[('hipStream_t', 'stream'), ('hipEvent_t', 'event'), ('unsigned int', 'flags')]
-    case HIP_API_ID_hipStreamWaitEvent:
-      break;
-// hipDeviceGetStreamPriorityRange[('int*', 'leastPriority'), ('int*', 'greatestPriority')]
-    case HIP_API_ID_hipDeviceGetStreamPriorityRange:
-      if (data->args.hipDeviceGetStreamPriorityRange.leastPriority) data->args.hipDeviceGetStreamPriorityRange.leastPriority__val = *(data->args.hipDeviceGetStreamPriorityRange.leastPriority);
-      if (data->args.hipDeviceGetStreamPriorityRange.greatestPriority) data->args.hipDeviceGetStreamPriorityRange.greatestPriority__val = *(data->args.hipDeviceGetStreamPriorityRange.greatestPriority);
-      break;
-// hipModuleLoad[('hipModule_t*', 'module'), ('const char*', 'fname')]
-    case HIP_API_ID_hipModuleLoad:
-      if (data->args.hipModuleLoad.module) data->args.hipModuleLoad.module__val = *(data->args.hipModuleLoad.module);
-      if (data->args.hipModuleLoad.fname) data->args.hipModuleLoad.fname__val = *(data->args.hipModuleLoad.fname);
-      break;
-// hipDevicePrimaryCtxSetFlags[('hipDevice_t', 'dev'), ('unsigned int', 'flags')]
-    case HIP_API_ID_hipDevicePrimaryCtxSetFlags:
-      break;
-// hipLaunchCooperativeKernel[('const void*', 'f'), ('dim3', 'gridDim'), ('dim3', 'blockDimX'), ('void**', 'kernelParams'), ('unsigned int', 'sharedMemBytes'), ('hipStream_t', 'stream')]
-    case HIP_API_ID_hipLaunchCooperativeKernel:
-      if (data->args.hipLaunchCooperativeKernel.kernelParams) data->args.hipLaunchCooperativeKernel.kernelParams__val = *(data->args.hipLaunchCooperativeKernel.kernelParams);
-      break;
-// hipLaunchCooperativeKernelMultiDevice[('hipLaunchParams*', 'launchParamsList'), ('int', 'numDevices'), ('unsigned int', 'flags')]
-    case HIP_API_ID_hipLaunchCooperativeKernelMultiDevice:
-      if (data->args.hipLaunchCooperativeKernelMultiDevice.launchParamsList) data->args.hipLaunchCooperativeKernelMultiDevice.launchParamsList__val = *(data->args.hipLaunchCooperativeKernelMultiDevice.launchParamsList);
-      break;
-// hipMemcpyAsync[('void*', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')]
-    case HIP_API_ID_hipMemcpyAsync:
-      break;
-// hipMalloc3DArray[('hipArray_t*', 'array'), ('const hipChannelFormatDesc*', 'desc'), ('hipExtent', 'extent'), ('unsigned int', 'flags')]
-    case HIP_API_ID_hipMalloc3DArray:
-      if (data->args.hipMalloc3DArray.array) data->args.hipMalloc3DArray.array__val = *(data->args.hipMalloc3DArray.array);
-      if (data->args.hipMalloc3DArray.desc) data->args.hipMalloc3DArray.desc__val = *(data->args.hipMalloc3DArray.desc);
-      break;
-// hipMallocHost[('void**', 'ptr'), ('size_t', 'size')]
-    case HIP_API_ID_hipMallocHost:
-      if (data->args.hipMallocHost.ptr) data->args.hipMallocHost.ptr__val = *(data->args.hipMallocHost.ptr);
-      break;
-// hipCtxGetCurrent[('hipCtx_t*', 'ctx')]
-    case HIP_API_ID_hipCtxGetCurrent:
-      if (data->args.hipCtxGetCurrent.ctx) data->args.hipCtxGetCurrent.ctx__val = *(data->args.hipCtxGetCurrent.ctx);
-      break;
-// hipDevicePrimaryCtxGetState[('hipDevice_t', 'dev'), ('unsigned int*', 'flags'), ('int*', 'active')]
-    case HIP_API_ID_hipDevicePrimaryCtxGetState:
-      if (data->args.hipDevicePrimaryCtxGetState.flags) data->args.hipDevicePrimaryCtxGetState.flags__val = *(data->args.hipDevicePrimaryCtxGetState.flags);
-      if (data->args.hipDevicePrimaryCtxGetState.active) data->args.hipDevicePrimaryCtxGetState.active__val = *(data->args.hipDevicePrimaryCtxGetState.active);
-      break;
-// hipEventQuery[('hipEvent_t', 'event')]
-    case HIP_API_ID_hipEventQuery:
-      break;
-// hipEventCreate[('hipEvent_t*', 'event')]
-    case HIP_API_ID_hipEventCreate:
-      if (data->args.hipEventCreate.event) data->args.hipEventCreate.event__val = *(data->args.hipEventCreate.event);
-      break;
-// hipMemGetAddressRange[('hipDeviceptr_t*', 'pbase'), ('size_t*', 'psize'), ('hipDeviceptr_t', 'dptr')]
-    case HIP_API_ID_hipMemGetAddressRange:
-      if (data->args.hipMemGetAddressRange.pbase) data->args.hipMemGetAddressRange.pbase__val = *(data->args.hipMemGetAddressRange.pbase);
-      if (data->args.hipMemGetAddressRange.psize) data->args.hipMemGetAddressRange.psize__val = *(data->args.hipMemGetAddressRange.psize);
-      break;
-// hipMemcpyFromSymbol[('void*', 'dst'), ('const void*', 'symbol'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')]
-    case HIP_API_ID_hipMemcpyFromSymbol:
-      break;
-// hipArrayCreate[('hipArray**', 'pHandle'), ('const HIP_ARRAY_DESCRIPTOR*', 'pAllocateArray')]
-    case HIP_API_ID_hipArrayCreate:
-      if (data->args.hipArrayCreate.pHandle) data->args.hipArrayCreate.pHandle__val = *(data->args.hipArrayCreate.pHandle);
-      if (data->args.hipArrayCreate.pAllocateArray) data->args.hipArrayCreate.pAllocateArray__val = *(data->args.hipArrayCreate.pAllocateArray);
-      break;
-// hipStreamAttachMemAsync[('hipStream_t', 'stream'), ('hipDeviceptr_t*', 'dev_ptr'), ('size_t', 'length'), ('unsigned int', 'flags')]
-    case HIP_API_ID_hipStreamAttachMemAsync:
-      if (data->args.hipStreamAttachMemAsync.dev_ptr) data->args.hipStreamAttachMemAsync.dev_ptr__val = *(data->args.hipStreamAttachMemAsync.dev_ptr);
-      break;
-// hipStreamGetFlags[('hipStream_t', 'stream'), ('unsigned int*', 'flags')]
-    case HIP_API_ID_hipStreamGetFlags:
-      if (data->args.hipStreamGetFlags.flags) data->args.hipStreamGetFlags.flags__val = *(data->args.hipStreamGetFlags.flags);
-      break;
-// hipMallocArray[('hipArray**', 'array'), ('const hipChannelFormatDesc*', 'desc'), ('size_t', 'width'), ('size_t', 'height'), ('unsigned int', 'flags')]
-    case HIP_API_ID_hipMallocArray:
-      if (data->args.hipMallocArray.array) data->args.hipMallocArray.array__val = *(data->args.hipMallocArray.array);
-      if (data->args.hipMallocArray.desc) data->args.hipMallocArray.desc__val = *(data->args.hipMallocArray.desc);
-      break;
-// hipCtxGetSharedMemConfig[('hipSharedMemConfig*', 'pConfig')]
-    case HIP_API_ID_hipCtxGetSharedMemConfig:
-      if (data->args.hipCtxGetSharedMemConfig.pConfig) data->args.hipCtxGetSharedMemConfig.pConfig__val = *(data->args.hipCtxGetSharedMemConfig.pConfig);
-      break;
-// hipDeviceDisablePeerAccess[('int', 'peerDeviceId')]
-    case HIP_API_ID_hipDeviceDisablePeerAccess:
-      break;
-// hipModuleOccupancyMaxPotentialBlockSize[('int*', 'gridSize'), ('int*', 'blockSize'), ('hipFunction_t', 'f'), ('size_t', 'dynSharedMemPerBlk'), ('int', 'blockSizeLimit')]
-    case HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSize:
-      if (data->args.hipModuleOccupancyMaxPotentialBlockSize.gridSize) data->args.hipModuleOccupancyMaxPotentialBlockSize.gridSize__val = *(data->args.hipModuleOccupancyMaxPotentialBlockSize.gridSize);
-      if (data->args.hipModuleOccupancyMaxPotentialBlockSize.blockSize) data->args.hipModuleOccupancyMaxPotentialBlockSize.blockSize__val = *(data->args.hipModuleOccupancyMaxPotentialBlockSize.blockSize);
-      break;
-// hipMemPtrGetInfo[('void*', 'ptr'), ('size_t*', 'size')]
-    case HIP_API_ID_hipMemPtrGetInfo:
-      if (data->args.hipMemPtrGetInfo.size) data->args.hipMemPtrGetInfo.size__val = *(data->args.hipMemPtrGetInfo.size);
-      break;
-// hipFuncGetAttribute[('int*', 'value'), ('hipFunction_attribute', 'attrib'), ('hipFunction_t', 'hfunc')]
-    case HIP_API_ID_hipFuncGetAttribute:
-      if (data->args.hipFuncGetAttribute.value) data->args.hipFuncGetAttribute.value__val = *(data->args.hipFuncGetAttribute.value);
-      break;
-// hipCtxGetFlags[('unsigned int*', 'flags')]
-    case HIP_API_ID_hipCtxGetFlags:
-      if (data->args.hipCtxGetFlags.flags) data->args.hipCtxGetFlags.flags__val = *(data->args.hipCtxGetFlags.flags);
-      break;
-// hipStreamDestroy[('hipStream_t', 'stream')]
-    case HIP_API_ID_hipStreamDestroy:
-      break;
-// __hipPushCallConfiguration[('dim3', 'gridDim'), ('dim3', 'blockDim'), ('size_t', 'sharedMem'), ('hipStream_t', 'stream')]
-    case HIP_API_ID___hipPushCallConfiguration:
-      break;
-// hipMemset3DAsync[('hipPitchedPtr', 'pitchedDevPtr'), ('int', 'value'), ('hipExtent', 'extent'), ('hipStream_t', 'stream')]
-    case HIP_API_ID_hipMemset3DAsync:
-      break;
-// hipDeviceGetPCIBusId[('char*', 'pciBusId'), ('int', 'len'), ('int', 'device')]
-    case HIP_API_ID_hipDeviceGetPCIBusId:
-      data->args.hipDeviceGetPCIBusId.pciBusId = (data->args.hipDeviceGetPCIBusId.pciBusId) ? strdup(data->args.hipDeviceGetPCIBusId.pciBusId) : NULL;
-      break;
-// hipInit[('unsigned int', 'flags')]
-    case HIP_API_ID_hipInit:
-      break;
-// hipMemcpyAtoH[('void*', 'dst'), ('hipArray*', 'srcArray'), ('size_t', 'srcOffset'), ('size_t', 'count')]
-    case HIP_API_ID_hipMemcpyAtoH:
-      if (data->args.hipMemcpyAtoH.srcArray) data->args.hipMemcpyAtoH.srcArray__val = *(data->args.hipMemcpyAtoH.srcArray);
-      break;
-// hipStreamGetPriority[('hipStream_t', 'stream'), ('int*', 'priority')]
-    case HIP_API_ID_hipStreamGetPriority:
-      if (data->args.hipStreamGetPriority.priority) data->args.hipStreamGetPriority.priority__val = *(data->args.hipStreamGetPriority.priority);
-      break;
-// hipMemset2D[('void*', 'dst'), ('size_t', 'pitch'), ('int', 'value'), ('size_t', 'width'), ('size_t', 'height')]
-    case HIP_API_ID_hipMemset2D:
-      break;
-// hipMemset2DAsync[('void*', 'dst'), ('size_t', 'pitch'), ('int', 'value'), ('size_t', 'width'), ('size_t', 'height'), ('hipStream_t', 'stream')]
-    case HIP_API_ID_hipMemset2DAsync:
-      break;
-// hipDeviceCanAccessPeer[('int*', 'canAccessPeer'), ('int', 'deviceId'), ('int', 'peerDeviceId')]
-    case HIP_API_ID_hipDeviceCanAccessPeer:
-      if (data->args.hipDeviceCanAccessPeer.canAccessPeer) data->args.hipDeviceCanAccessPeer.canAccessPeer__val = *(data->args.hipDeviceCanAccessPeer.canAccessPeer);
-      break;
-// hipLaunchByPtr[('const void*', 'hostFunction')]
-    case HIP_API_ID_hipLaunchByPtr:
-      break;
-// hipMemPrefetchAsync[('const void*', 'dev_ptr'), ('size_t', 'count'), ('int', 'device'), ('hipStream_t', 'stream')]
-    case HIP_API_ID_hipMemPrefetchAsync:
-      break;
-// hipCtxDestroy[('hipCtx_t', 'ctx')]
-    case HIP_API_ID_hipCtxDestroy:
-      break;
-// hipMemsetD16Async[('hipDeviceptr_t', 'dest'), ('unsigned short', 'value'), ('size_t', 'count'), ('hipStream_t', 'stream')]
-    case HIP_API_ID_hipMemsetD16Async:
-      break;
-// hipModuleUnload[('hipModule_t', 'module')]
-    case HIP_API_ID_hipModuleUnload:
-      break;
-// hipHostUnregister[('void*', 'hostPtr')]
-    case HIP_API_ID_hipHostUnregister:
-      break;
-// hipProfilerStop[]
-    case HIP_API_ID_hipProfilerStop:
-      break;
-// hipExtStreamCreateWithCUMask[('hipStream_t*', 'stream'), ('unsigned int', 'cuMaskSize'), ('const unsigned int*', 'cuMask')]
-    case HIP_API_ID_hipExtStreamCreateWithCUMask:
-      if (data->args.hipExtStreamCreateWithCUMask.stream) data->args.hipExtStreamCreateWithCUMask.stream__val = *(data->args.hipExtStreamCreateWithCUMask.stream);
-      if (data->args.hipExtStreamCreateWithCUMask.cuMask) data->args.hipExtStreamCreateWithCUMask.cuMask__val = *(data->args.hipExtStreamCreateWithCUMask.cuMask);
-      break;
-// hipStreamSynchronize[('hipStream_t', 'stream')]
-    case HIP_API_ID_hipStreamSynchronize:
-      break;
-// hipFreeHost[('void*', 'ptr')]
-    case HIP_API_ID_hipFreeHost:
-      break;
-// hipDeviceSetCacheConfig[('hipFuncCache_t', 'cacheConfig')]
-    case HIP_API_ID_hipDeviceSetCacheConfig:
-      break;
-// hipGetErrorName[]
-    case HIP_API_ID_hipGetErrorName:
-      break;
-// hipMemcpyHtoD[('hipDeviceptr_t', 'dst'), ('void*', 'src'), ('size_t', 'sizeBytes')]
-    case HIP_API_ID_hipMemcpyHtoD:
-      break;
-// hipModuleGetGlobal[('hipDeviceptr_t*', 'dptr'), ('size_t*', 'bytes'), ('hipModule_t', 'hmod'), ('const char*', 'name')]
-    case HIP_API_ID_hipModuleGetGlobal:
-      if (data->args.hipModuleGetGlobal.dptr) data->args.hipModuleGetGlobal.dptr__val = *(data->args.hipModuleGetGlobal.dptr);
-      if (data->args.hipModuleGetGlobal.bytes) data->args.hipModuleGetGlobal.bytes__val = *(data->args.hipModuleGetGlobal.bytes);
-      if (data->args.hipModuleGetGlobal.name) data->args.hipModuleGetGlobal.name__val = *(data->args.hipModuleGetGlobal.name);
-      break;
-// hipMemcpyHtoA[('hipArray*', 'dstArray'), ('size_t', 'dstOffset'), ('const void*', 'srcHost'), ('size_t', 'count')]
-    case HIP_API_ID_hipMemcpyHtoA:
-      if (data->args.hipMemcpyHtoA.dstArray) data->args.hipMemcpyHtoA.dstArray__val = *(data->args.hipMemcpyHtoA.dstArray);
-      break;
-// hipCtxCreate[('hipCtx_t*', 'ctx'), ('unsigned int', 'flags'), ('hipDevice_t', 'device')]
-    case HIP_API_ID_hipCtxCreate:
-      if (data->args.hipCtxCreate.ctx) data->args.hipCtxCreate.ctx__val = *(data->args.hipCtxCreate.ctx);
-      break;
-// hipMemcpy2D[('void*', 'dst'), ('size_t', 'dpitch'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind')]
-    case HIP_API_ID_hipMemcpy2D:
-      break;
-// hipIpcCloseMemHandle[('void*', 'devPtr')]
-    case HIP_API_ID_hipIpcCloseMemHandle:
-      break;
-// hipChooseDevice[('int*', 'device'), ('const hipDeviceProp_t*', 'prop')]
-    case HIP_API_ID_hipChooseDevice:
-      if (data->args.hipChooseDevice.device) data->args.hipChooseDevice.device__val = *(data->args.hipChooseDevice.device);
-      if (data->args.hipChooseDevice.prop) data->args.hipChooseDevice.prop__val = *(data->args.hipChooseDevice.prop);
-      break;
-// hipDeviceSetSharedMemConfig[('hipSharedMemConfig', 'config')]
-    case HIP_API_ID_hipDeviceSetSharedMemConfig:
-      break;
-// hipMallocMipmappedArray[('hipMipmappedArray_t*', 'mipmappedArray'), ('const hipChannelFormatDesc*', 'desc'), ('hipExtent', 'extent'), ('unsigned int', 'numLevels'), ('unsigned int', 'flags')]
-    case HIP_API_ID_hipMallocMipmappedArray:
-      if (data->args.hipMallocMipmappedArray.mipmappedArray) data->args.hipMallocMipmappedArray.mipmappedArray__val = *(data->args.hipMallocMipmappedArray.mipmappedArray);
-      if (data->args.hipMallocMipmappedArray.desc) data->args.hipMallocMipmappedArray.desc__val = *(data->args.hipMallocMipmappedArray.desc);
-      break;
-// hipSetupArgument[('const void*', 'arg'), ('size_t', 'size'), ('size_t', 'offset')]
-    case HIP_API_ID_hipSetupArgument:
-      break;
-// hipIpcGetEventHandle[('hipIpcEventHandle_t*', 'handle'), ('hipEvent_t', 'event')]
-    case HIP_API_ID_hipIpcGetEventHandle:
-      if (data->args.hipIpcGetEventHandle.handle) data->args.hipIpcGetEventHandle.handle__val = *(data->args.hipIpcGetEventHandle.handle);
-      break;
-// hipFreeArray[('hipArray*', 'array')]
-    case HIP_API_ID_hipFreeArray:
-      if (data->args.hipFreeArray.array) data->args.hipFreeArray.array__val = *(data->args.hipFreeArray.array);
-      break;
-// hipCtxSetCacheConfig[('hipFuncCache_t', 'cacheConfig')]
-    case HIP_API_ID_hipCtxSetCacheConfig:
-      break;
-// hipFuncSetCacheConfig[('const void*', 'func'), ('hipFuncCache_t', 'config')]
-    case HIP_API_ID_hipFuncSetCacheConfig:
-      break;
-// hipLaunchKernel[('const void*', 'function_address'), ('dim3', 'numBlocks'), ('dim3', 'dimBlocks'), ('void**', 'args'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'stream')]
-    case HIP_API_ID_hipLaunchKernel:
-      if (data->args.hipLaunchKernel.args) data->args.hipLaunchKernel.args__val = *(data->args.hipLaunchKernel.args);
-      break;
-// hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags[('int*', 'numBlocks'), ('hipFunction_t', 'f'), ('int', 'blockSize'), ('size_t', 'dynSharedMemPerBlk'), ('unsigned int', 'flags')]
-    case HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags:
-      if (data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks) data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks__val = *(data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks);
-      break;
-// hipModuleGetTexRef[('textureReference**', 'texRef'), ('hipModule_t', 'hmod'), ('const char*', 'name')]
-    case HIP_API_ID_hipModuleGetTexRef:
-      if (data->args.hipModuleGetTexRef.texRef) data->args.hipModuleGetTexRef.texRef__val = *(data->args.hipModuleGetTexRef.texRef);
-      if (data->args.hipModuleGetTexRef.name) data->args.hipModuleGetTexRef.name__val = *(data->args.hipModuleGetTexRef.name);
-      break;
-// hipFuncSetAttribute[('const void*', 'func'), ('hipFuncAttribute', 'attr'), ('int', 'value')]
-    case HIP_API_ID_hipFuncSetAttribute:
-      break;
-// hipEventElapsedTime[('float*', 'ms'), ('hipEvent_t', 'start'), ('hipEvent_t', 'stop')]
-    case HIP_API_ID_hipEventElapsedTime:
-      if (data->args.hipEventElapsedTime.ms) data->args.hipEventElapsedTime.ms__val = *(data->args.hipEventElapsedTime.ms);
-      break;
-// hipConfigureCall[('dim3', 'gridDim'), ('dim3', 'blockDim'), ('size_t', 'sharedMem'), ('hipStream_t', 'stream')]
-    case HIP_API_ID_hipConfigureCall:
-      break;
-// hipMemAdvise[('const void*', 'dev_ptr'), ('size_t', 'count'), ('hipMemoryAdvise', 'advice'), ('int', 'device')]
-    case HIP_API_ID_hipMemAdvise:
-      break;
-// hipMemcpy3DAsync[('const hipMemcpy3DParms*', 'p'), ('hipStream_t', 'stream')]
-    case HIP_API_ID_hipMemcpy3DAsync:
-      if (data->args.hipMemcpy3DAsync.p) data->args.hipMemcpy3DAsync.p__val = *(data->args.hipMemcpy3DAsync.p);
-      break;
-// hipEventDestroy[('hipEvent_t', 'event')]
-    case HIP_API_ID_hipEventDestroy:
-      break;
-// hipCtxPopCurrent[('hipCtx_t*', 'ctx')]
-    case HIP_API_ID_hipCtxPopCurrent:
-      if (data->args.hipCtxPopCurrent.ctx) data->args.hipCtxPopCurrent.ctx__val = *(data->args.hipCtxPopCurrent.ctx);
-      break;
-// hipGetSymbolAddress[('void**', 'devPtr'), ('const void*', 'symbol')]
-    case HIP_API_ID_hipGetSymbolAddress:
-      if (data->args.hipGetSymbolAddress.devPtr) data->args.hipGetSymbolAddress.devPtr__val = *(data->args.hipGetSymbolAddress.devPtr);
-      break;
-// hipHostGetFlags[('unsigned int*', 'flagsPtr'), ('void*', 'hostPtr')]
-    case HIP_API_ID_hipHostGetFlags:
-      if (data->args.hipHostGetFlags.flagsPtr) data->args.hipHostGetFlags.flagsPtr__val = *(data->args.hipHostGetFlags.flagsPtr);
-      break;
-// hipHostMalloc[('void**', 'ptr'), ('size_t', 'size'), ('unsigned int', 'flags')]
-    case HIP_API_ID_hipHostMalloc:
-      if (data->args.hipHostMalloc.ptr) data->args.hipHostMalloc.ptr__val = *(data->args.hipHostMalloc.ptr);
-      break;
-// hipCtxSetSharedMemConfig[('hipSharedMemConfig', 'config')]
-    case HIP_API_ID_hipCtxSetSharedMemConfig:
-      break;
-// hipFreeMipmappedArray[('hipMipmappedArray_t', 'mipmappedArray')]
-    case HIP_API_ID_hipFreeMipmappedArray:
-      break;
-// hipMemGetInfo[('size_t*', 'free'), ('size_t*', 'total')]
-    case HIP_API_ID_hipMemGetInfo:
-      if (data->args.hipMemGetInfo.free) data->args.hipMemGetInfo.free__val = *(data->args.hipMemGetInfo.free);
-      if (data->args.hipMemGetInfo.total) data->args.hipMemGetInfo.total__val = *(data->args.hipMemGetInfo.total);
-      break;
-// hipDeviceReset[]
-    case HIP_API_ID_hipDeviceReset:
-      break;
-// hipMemset[('void*', 'dst'), ('int', 'value'), ('size_t', 'sizeBytes')]
-    case HIP_API_ID_hipMemset:
-      break;
-// hipMemsetD8[('hipDeviceptr_t', 'dest'), ('unsigned char', 'value'), ('size_t', 'count')]
-    case HIP_API_ID_hipMemsetD8:
-      break;
-// hipMemcpyParam2DAsync[('const hip_Memcpy2D*', 'pCopy'), ('hipStream_t', 'stream')]
-    case HIP_API_ID_hipMemcpyParam2DAsync:
-      if (data->args.hipMemcpyParam2DAsync.pCopy) data->args.hipMemcpyParam2DAsync.pCopy__val = *(data->args.hipMemcpyParam2DAsync.pCopy);
-      break;
-// hipHostRegister[('void*', 'hostPtr'), ('size_t', 'sizeBytes'), ('unsigned int', 'flags')]
-    case HIP_API_ID_hipHostRegister:
-      break;
-// hipDriverGetVersion[('int*', 'driverVersion')]
-    case HIP_API_ID_hipDriverGetVersion:
-      if (data->args.hipDriverGetVersion.driverVersion) data->args.hipDriverGetVersion.driverVersion__val = *(data->args.hipDriverGetVersion.driverVersion);
-      break;
-// hipArray3DCreate[('hipArray**', 'array'), ('const HIP_ARRAY3D_DESCRIPTOR*', 'pAllocateArray')]
-    case HIP_API_ID_hipArray3DCreate:
-      if (data->args.hipArray3DCreate.array) data->args.hipArray3DCreate.array__val = *(data->args.hipArray3DCreate.array);
-      if (data->args.hipArray3DCreate.pAllocateArray) data->args.hipArray3DCreate.pAllocateArray__val = *(data->args.hipArray3DCreate.pAllocateArray);
-      break;
-// hipIpcOpenMemHandle[('void**', 'devPtr'), ('hipIpcMemHandle_t', 'handle'), ('unsigned int', 'flags')]
-    case HIP_API_ID_hipIpcOpenMemHandle:
-      if (data->args.hipIpcOpenMemHandle.devPtr) data->args.hipIpcOpenMemHandle.devPtr__val = *(data->args.hipIpcOpenMemHandle.devPtr);
-      break;
-// hipGetLastError[]
-    case HIP_API_ID_hipGetLastError:
-      break;
-// hipGetDeviceFlags[('unsigned int*', 'flags')]
-    case HIP_API_ID_hipGetDeviceFlags:
-      if (data->args.hipGetDeviceFlags.flags) data->args.hipGetDeviceFlags.flags__val = *(data->args.hipGetDeviceFlags.flags);
-      break;
-// hipDeviceGetSharedMemConfig[('hipSharedMemConfig*', 'pConfig')]
-    case HIP_API_ID_hipDeviceGetSharedMemConfig:
-      if (data->args.hipDeviceGetSharedMemConfig.pConfig) data->args.hipDeviceGetSharedMemConfig.pConfig__val = *(data->args.hipDeviceGetSharedMemConfig.pConfig);
-      break;
-// hipDrvMemcpy3D[('const HIP_MEMCPY3D*', 'pCopy')]
-    case HIP_API_ID_hipDrvMemcpy3D:
-      if (data->args.hipDrvMemcpy3D.pCopy) data->args.hipDrvMemcpy3D.pCopy__val = *(data->args.hipDrvMemcpy3D.pCopy);
-      break;
-// hipMemcpy2DFromArray[('void*', 'dst'), ('size_t', 'dpitch'), ('hipArray_const_t', 'src'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind')]
-    case HIP_API_ID_hipMemcpy2DFromArray:
-      break;
-// hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags[('int*', 'numBlocks'), ('const void*', 'f'), ('int', 'blockSize'), ('size_t', 'dynamicSMemSize'), ('unsigned int', 'flags')]
-    case HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags:
-      if (data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks) data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks__val = *(data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks);
-      break;
-// hipSetDeviceFlags[('unsigned int', 'flags')]
-    case HIP_API_ID_hipSetDeviceFlags:
-      break;
-// hipHccModuleLaunchKernel[('hipFunction_t', 'f'), ('unsigned int', 'globalWorkSizeX'), ('unsigned int', 'globalWorkSizeY'), ('unsigned int', 'globalWorkSizeZ'), ('unsigned int', 'blockDimX'), ('unsigned int', 'blockDimY'), ('unsigned int', 'blockDimZ'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'hStream'), ('void**', 'kernelParams'), ('void**', 'extra'), ('hipEvent_t', 'startEvent'), ('hipEvent_t', 'stopEvent')]
-    case HIP_API_ID_hipHccModuleLaunchKernel:
-      if (data->args.hipHccModuleLaunchKernel.kernelParams) data->args.hipHccModuleLaunchKernel.kernelParams__val = *(data->args.hipHccModuleLaunchKernel.kernelParams);
-      if (data->args.hipHccModuleLaunchKernel.extra) data->args.hipHccModuleLaunchKernel.extra__val = *(data->args.hipHccModuleLaunchKernel.extra);
-      break;
-// hipFree[('void*', 'ptr')]
-    case HIP_API_ID_hipFree:
-      break;
-// hipOccupancyMaxPotentialBlockSize[('int*', 'gridSize'), ('int*', 'blockSize'), ('const void*', 'f'), ('size_t', 'dynSharedMemPerBlk'), ('int', 'blockSizeLimit')]
-    case HIP_API_ID_hipOccupancyMaxPotentialBlockSize:
-      if (data->args.hipOccupancyMaxPotentialBlockSize.gridSize) data->args.hipOccupancyMaxPotentialBlockSize.gridSize__val = *(data->args.hipOccupancyMaxPotentialBlockSize.gridSize);
-      if (data->args.hipOccupancyMaxPotentialBlockSize.blockSize) data->args.hipOccupancyMaxPotentialBlockSize.blockSize__val = *(data->args.hipOccupancyMaxPotentialBlockSize.blockSize);
-      break;
-// hipDeviceGetAttribute[('int*', 'pi'), ('hipDeviceAttribute_t', 'attr'), ('int', 'deviceId')]
-    case HIP_API_ID_hipDeviceGetAttribute:
-      if (data->args.hipDeviceGetAttribute.pi) data->args.hipDeviceGetAttribute.pi__val = *(data->args.hipDeviceGetAttribute.pi);
-      break;
-// hipDeviceComputeCapability[('int*', 'major'), ('int*', 'minor'), ('hipDevice_t', 'device')]
-    case HIP_API_ID_hipDeviceComputeCapability:
-      if (data->args.hipDeviceComputeCapability.major) data->args.hipDeviceComputeCapability.major__val = *(data->args.hipDeviceComputeCapability.major);
-      if (data->args.hipDeviceComputeCapability.minor) data->args.hipDeviceComputeCapability.minor__val = *(data->args.hipDeviceComputeCapability.minor);
-      break;
-// hipCtxDisablePeerAccess[('hipCtx_t', 'peerCtx')]
-    case HIP_API_ID_hipCtxDisablePeerAccess:
-      break;
-// hipMallocManaged[('void**', 'dev_ptr'), ('size_t', 'size'), ('unsigned int', 'flags')]
-    case HIP_API_ID_hipMallocManaged:
-      if (data->args.hipMallocManaged.dev_ptr) data->args.hipMallocManaged.dev_ptr__val = *(data->args.hipMallocManaged.dev_ptr);
-      break;
-// hipDeviceGetByPCIBusId[('int*', 'device'), ('const char*', 'pciBusId')]
-    case HIP_API_ID_hipDeviceGetByPCIBusId:
-      if (data->args.hipDeviceGetByPCIBusId.device) data->args.hipDeviceGetByPCIBusId.device__val = *(data->args.hipDeviceGetByPCIBusId.device);
-      if (data->args.hipDeviceGetByPCIBusId.pciBusId) data->args.hipDeviceGetByPCIBusId.pciBusId__val = *(data->args.hipDeviceGetByPCIBusId.pciBusId);
-      break;
-// hipIpcGetMemHandle[('hipIpcMemHandle_t*', 'handle'), ('void*', 'devPtr')]
-    case HIP_API_ID_hipIpcGetMemHandle:
-      if (data->args.hipIpcGetMemHandle.handle) data->args.hipIpcGetMemHandle.handle__val = *(data->args.hipIpcGetMemHandle.handle);
-      break;
-// hipMemcpyHtoDAsync[('hipDeviceptr_t', 'dst'), ('void*', 'src'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')]
-    case HIP_API_ID_hipMemcpyHtoDAsync:
-      break;
-// hipCtxGetDevice[('hipDevice_t*', 'device')]
-    case HIP_API_ID_hipCtxGetDevice:
-      if (data->args.hipCtxGetDevice.device) data->args.hipCtxGetDevice.device__val = *(data->args.hipCtxGetDevice.device);
-      break;
-// hipMemcpyDtoD[('hipDeviceptr_t', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes')]
-    case HIP_API_ID_hipMemcpyDtoD:
-      break;
-// hipModuleLoadData[('hipModule_t*', 'module'), ('const void*', 'image')]
-    case HIP_API_ID_hipModuleLoadData:
-      if (data->args.hipModuleLoadData.module) data->args.hipModuleLoadData.module__val = *(data->args.hipModuleLoadData.module);
-      break;
-// hipDevicePrimaryCtxRelease[('hipDevice_t', 'dev')]
-    case HIP_API_ID_hipDevicePrimaryCtxRelease:
-      break;
-// hipOccupancyMaxActiveBlocksPerMultiprocessor[('int*', 'numBlocks'), ('const void*', 'f'), ('int', 'blockSize'), ('size_t', 'dynamicSMemSize')]
-    case HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessor:
-      if (data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks) data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks__val = *(data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks);
-      break;
-// hipCtxSetCurrent[('hipCtx_t', 'ctx')]
-    case HIP_API_ID_hipCtxSetCurrent:
-      break;
-// hipGetErrorString[]
-    case HIP_API_ID_hipGetErrorString:
-      break;
-// hipStreamCreate[('hipStream_t*', 'stream')]
-    case HIP_API_ID_hipStreamCreate:
-      if (data->args.hipStreamCreate.stream) data->args.hipStreamCreate.stream__val = *(data->args.hipStreamCreate.stream);
-      break;
-// hipDevicePrimaryCtxRetain[('hipCtx_t*', 'pctx'), ('hipDevice_t', 'dev')]
-    case HIP_API_ID_hipDevicePrimaryCtxRetain:
-      if (data->args.hipDevicePrimaryCtxRetain.pctx) data->args.hipDevicePrimaryCtxRetain.pctx__val = *(data->args.hipDevicePrimaryCtxRetain.pctx);
-      break;
-// hipDeviceGet[('hipDevice_t*', 'device'), ('int', 'ordinal')]
-    case HIP_API_ID_hipDeviceGet:
-      if (data->args.hipDeviceGet.device) data->args.hipDeviceGet.device__val = *(data->args.hipDeviceGet.device);
-      break;
-// hipStreamCreateWithFlags[('hipStream_t*', 'stream'), ('unsigned int', 'flags')]
-    case HIP_API_ID_hipStreamCreateWithFlags:
-      if (data->args.hipStreamCreateWithFlags.stream) data->args.hipStreamCreateWithFlags.stream__val = *(data->args.hipStreamCreateWithFlags.stream);
-      break;
-// hipMemcpyFromArray[('void*', 'dst'), ('hipArray_const_t', 'srcArray'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')]
-    case HIP_API_ID_hipMemcpyFromArray:
-      break;
-// hipMemcpy2DAsync[('void*', 'dst'), ('size_t', 'dpitch'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')]
-    case HIP_API_ID_hipMemcpy2DAsync:
-      break;
-// hipFuncGetAttributes[('hipFuncAttributes*', 'attr'), ('const void*', 'func')]
-    case HIP_API_ID_hipFuncGetAttributes:
-      if (data->args.hipFuncGetAttributes.attr) data->args.hipFuncGetAttributes.attr__val = *(data->args.hipFuncGetAttributes.attr);
-      break;
-// hipGetSymbolSize[('size_t*', 'size'), ('const void*', 'symbol')]
-    case HIP_API_ID_hipGetSymbolSize:
-      if (data->args.hipGetSymbolSize.size) data->args.hipGetSymbolSize.size__val = *(data->args.hipGetSymbolSize.size);
-      break;
-// hipHostFree[('void*', 'ptr')]
-    case HIP_API_ID_hipHostFree:
-      break;
-// hipEventCreateWithFlags[('hipEvent_t*', 'event'), ('unsigned int', 'flags')]
-    case HIP_API_ID_hipEventCreateWithFlags:
-      if (data->args.hipEventCreateWithFlags.event) data->args.hipEventCreateWithFlags.event__val = *(data->args.hipEventCreateWithFlags.event);
-      break;
-// hipStreamQuery[('hipStream_t', 'stream')]
-    case HIP_API_ID_hipStreamQuery:
-      break;
-// hipMemcpy3D[('const hipMemcpy3DParms*', 'p')]
-    case HIP_API_ID_hipMemcpy3D:
-      if (data->args.hipMemcpy3D.p) data->args.hipMemcpy3D.p__val = *(data->args.hipMemcpy3D.p);
-      break;
-// hipMemcpyToSymbol[('const void*', 'symbol'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')]
-    case HIP_API_ID_hipMemcpyToSymbol:
-      break;
-// hipMemcpy[('void*', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipMemcpyKind', 'kind')]
-    case HIP_API_ID_hipMemcpy:
-      break;
-// hipPeekAtLastError[]
-    case HIP_API_ID_hipPeekAtLastError:
-      break;
-// hipExtLaunchMultiKernelMultiDevice[('hipLaunchParams*', 'launchParamsList'), ('int', 'numDevices'), ('unsigned int', 'flags')]
-    case HIP_API_ID_hipExtLaunchMultiKernelMultiDevice:
-      if (data->args.hipExtLaunchMultiKernelMultiDevice.launchParamsList) data->args.hipExtLaunchMultiKernelMultiDevice.launchParamsList__val = *(data->args.hipExtLaunchMultiKernelMultiDevice.launchParamsList);
-      break;
-// hipHostAlloc[('void**', 'ptr'), ('size_t', 'size'), ('unsigned int', 'flags')]
-    case HIP_API_ID_hipHostAlloc:
-      if (data->args.hipHostAlloc.ptr) data->args.hipHostAlloc.ptr__val = *(data->args.hipHostAlloc.ptr);
-      break;
-// hipStreamAddCallback[('hipStream_t', 'stream'), ('hipStreamCallback_t', 'callback'), ('void*', 'userData'), ('unsigned int', 'flags')]
-    case HIP_API_ID_hipStreamAddCallback:
-      break;
-// hipMemcpyToArray[('hipArray*', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')]
-    case HIP_API_ID_hipMemcpyToArray:
-      if (data->args.hipMemcpyToArray.dst) data->args.hipMemcpyToArray.dst__val = *(data->args.hipMemcpyToArray.dst);
-      break;
-// hipMemsetD32[('hipDeviceptr_t', 'dest'), ('int', 'value'), ('size_t', 'count')]
-    case HIP_API_ID_hipMemsetD32:
-      break;
-// hipExtModuleLaunchKernel[('hipFunction_t', 'f'), ('unsigned int', 'globalWorkSizeX'), ('unsigned int', 'globalWorkSizeY'), ('unsigned int', 'globalWorkSizeZ'), ('unsigned int', 'localWorkSizeX'), ('unsigned int', 'localWorkSizeY'), ('unsigned int', 'localWorkSizeZ'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'hStream'), ('void**', 'kernelParams'), ('void**', 'extra'), ('hipEvent_t', 'startEvent'), ('hipEvent_t', 'stopEvent'), ('unsigned int', 'flags')]
-    case HIP_API_ID_hipExtModuleLaunchKernel:
-      if (data->args.hipExtModuleLaunchKernel.kernelParams) data->args.hipExtModuleLaunchKernel.kernelParams__val = *(data->args.hipExtModuleLaunchKernel.kernelParams);
-      if (data->args.hipExtModuleLaunchKernel.extra) data->args.hipExtModuleLaunchKernel.extra__val = *(data->args.hipExtModuleLaunchKernel.extra);
-      break;
-// hipDeviceSynchronize[]
-    case HIP_API_ID_hipDeviceSynchronize:
-      break;
-// hipDeviceGetCacheConfig[('hipFuncCache_t*', 'cacheConfig')]
-    case HIP_API_ID_hipDeviceGetCacheConfig:
-      if (data->args.hipDeviceGetCacheConfig.cacheConfig) data->args.hipDeviceGetCacheConfig.cacheConfig__val = *(data->args.hipDeviceGetCacheConfig.cacheConfig);
-      break;
-// hipMalloc3D[('hipPitchedPtr*', 'pitchedDevPtr'), ('hipExtent', 'extent')]
-    case HIP_API_ID_hipMalloc3D:
-      if (data->args.hipMalloc3D.pitchedDevPtr) data->args.hipMalloc3D.pitchedDevPtr__val = *(data->args.hipMalloc3D.pitchedDevPtr);
-      break;
-// hipPointerGetAttributes[('hipPointerAttribute_t*', 'attributes'), ('const void*', 'ptr')]
-    case HIP_API_ID_hipPointerGetAttributes:
-      if (data->args.hipPointerGetAttributes.attributes) data->args.hipPointerGetAttributes.attributes__val = *(data->args.hipPointerGetAttributes.attributes);
-      break;
-// hipMemsetAsync[('void*', 'dst'), ('int', 'value'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')]
-    case HIP_API_ID_hipMemsetAsync:
-      break;
-// hipDeviceGetName[('char*', 'name'), ('int', 'len'), ('hipDevice_t', 'device')]
-    case HIP_API_ID_hipDeviceGetName:
-      data->args.hipDeviceGetName.name = (data->args.hipDeviceGetName.name) ? strdup(data->args.hipDeviceGetName.name) : NULL;
-      break;
-// hipModuleOccupancyMaxPotentialBlockSizeWithFlags[('int*', 'gridSize'), ('int*', 'blockSize'), ('hipFunction_t', 'f'), ('size_t', 'dynSharedMemPerBlk'), ('int', 'blockSizeLimit'), ('unsigned int', 'flags')]
-    case HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSizeWithFlags:
-      if (data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.gridSize) data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.gridSize__val = *(data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.gridSize);
-      if (data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSize) data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSize__val = *(data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSize);
-      break;
-// hipCtxPushCurrent[('hipCtx_t', 'ctx')]
-    case HIP_API_ID_hipCtxPushCurrent:
-      break;
-// hipMemcpyPeer[('void*', 'dst'), ('int', 'dstDeviceId'), ('const void*', 'src'), ('int', 'srcDeviceId'), ('size_t', 'sizeBytes')]
-    case HIP_API_ID_hipMemcpyPeer:
-      break;
-// hipEventSynchronize[('hipEvent_t', 'event')]
-    case HIP_API_ID_hipEventSynchronize:
-      break;
-// hipMemcpyDtoDAsync[('hipDeviceptr_t', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')]
-    case HIP_API_ID_hipMemcpyDtoDAsync:
-      break;
-// hipProfilerStart[]
-    case HIP_API_ID_hipProfilerStart:
-      break;
-// hipExtMallocWithFlags[('void**', 'ptr'), ('size_t', 'sizeBytes'), ('unsigned int', 'flags')]
-    case HIP_API_ID_hipExtMallocWithFlags:
-      if (data->args.hipExtMallocWithFlags.ptr) data->args.hipExtMallocWithFlags.ptr__val = *(data->args.hipExtMallocWithFlags.ptr);
-      break;
-// hipCtxEnablePeerAccess[('hipCtx_t', 'peerCtx'), ('unsigned int', 'flags')]
-    case HIP_API_ID_hipCtxEnablePeerAccess:
-      break;
-// hipMemAllocHost[('void**', 'ptr'), ('size_t', 'size')]
-    case HIP_API_ID_hipMemAllocHost:
-      if (data->args.hipMemAllocHost.ptr) data->args.hipMemAllocHost.ptr__val = *(data->args.hipMemAllocHost.ptr);
-      break;
-// hipMemcpyDtoHAsync[('void*', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')]
-    case HIP_API_ID_hipMemcpyDtoHAsync:
-      break;
-// hipModuleLaunchKernel[('hipFunction_t', 'f'), ('unsigned int', 'gridDimX'), ('unsigned int', 'gridDimY'), ('unsigned int', 'gridDimZ'), ('unsigned int', 'blockDimX'), ('unsigned int', 'blockDimY'), ('unsigned int', 'blockDimZ'), ('unsigned int', 'sharedMemBytes'), ('hipStream_t', 'stream'), ('void**', 'kernelParams'), ('void**', 'extra')]
-    case HIP_API_ID_hipModuleLaunchKernel:
-      if (data->args.hipModuleLaunchKernel.kernelParams) data->args.hipModuleLaunchKernel.kernelParams__val = *(data->args.hipModuleLaunchKernel.kernelParams);
-      if (data->args.hipModuleLaunchKernel.extra) data->args.hipModuleLaunchKernel.extra__val = *(data->args.hipModuleLaunchKernel.extra);
-      break;
-// hipMemAllocPitch[('hipDeviceptr_t*', 'dptr'), ('size_t*', 'pitch'), ('size_t', 'widthInBytes'), ('size_t', 'height'), ('unsigned int', 'elementSizeBytes')]
-    case HIP_API_ID_hipMemAllocPitch:
-      if (data->args.hipMemAllocPitch.dptr) data->args.hipMemAllocPitch.dptr__val = *(data->args.hipMemAllocPitch.dptr);
-      if (data->args.hipMemAllocPitch.pitch) data->args.hipMemAllocPitch.pitch__val = *(data->args.hipMemAllocPitch.pitch);
-      break;
-// hipExtLaunchKernel[('const void*', 'function_address'), ('dim3', 'numBlocks'), ('dim3', 'dimBlocks'), ('void**', 'args'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'stream'), ('hipEvent_t', 'startEvent'), ('hipEvent_t', 'stopEvent'), ('int', 'flags')]
-    case HIP_API_ID_hipExtLaunchKernel:
-      if (data->args.hipExtLaunchKernel.args) data->args.hipExtLaunchKernel.args__val = *(data->args.hipExtLaunchKernel.args);
-      break;
-// hipMemcpy2DFromArrayAsync[('void*', 'dst'), ('size_t', 'dpitch'), ('hipArray_const_t', 'src'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')]
-    case HIP_API_ID_hipMemcpy2DFromArrayAsync:
-      break;
-// hipDeviceGetLimit[('size_t*', 'pValue'), ('hipLimit_t', 'limit')]
-    case HIP_API_ID_hipDeviceGetLimit:
-      if (data->args.hipDeviceGetLimit.pValue) data->args.hipDeviceGetLimit.pValue__val = *(data->args.hipDeviceGetLimit.pValue);
-      break;
-// hipModuleLoadDataEx[('hipModule_t*', 'module'), ('const void*', 'image'), ('unsigned int', 'numOptions'), ('hipJitOption*', 'options'), ('void**', 'optionsValues')]
-    case HIP_API_ID_hipModuleLoadDataEx:
-      if (data->args.hipModuleLoadDataEx.module) data->args.hipModuleLoadDataEx.module__val = *(data->args.hipModuleLoadDataEx.module);
-      if (data->args.hipModuleLoadDataEx.options) data->args.hipModuleLoadDataEx.options__val = *(data->args.hipModuleLoadDataEx.options);
-      if (data->args.hipModuleLoadDataEx.optionsValues) data->args.hipModuleLoadDataEx.optionsValues__val = *(data->args.hipModuleLoadDataEx.optionsValues);
-      break;
-// hipRuntimeGetVersion[('int*', 'runtimeVersion')]
-    case HIP_API_ID_hipRuntimeGetVersion:
-      if (data->args.hipRuntimeGetVersion.runtimeVersion) data->args.hipRuntimeGetVersion.runtimeVersion__val = *(data->args.hipRuntimeGetVersion.runtimeVersion);
-      break;
-// hipMemRangeGetAttribute[('void*', 'data'), ('size_t', 'data_size'), ('hipMemRangeAttribute', 'attribute'), ('const void*', 'dev_ptr'), ('size_t', 'count')]
-    case HIP_API_ID_hipMemRangeGetAttribute:
-      break;
-// hipDeviceGetP2PAttribute[('int*', 'value'), ('hipDeviceP2PAttr', 'attr'), ('int', 'srcDevice'), ('int', 'dstDevice')]
-    case HIP_API_ID_hipDeviceGetP2PAttribute:
-      if (data->args.hipDeviceGetP2PAttribute.value) data->args.hipDeviceGetP2PAttribute.value__val = *(data->args.hipDeviceGetP2PAttribute.value);
-      break;
-// hipMemcpyPeerAsync[('void*', 'dst'), ('int', 'dstDeviceId'), ('const void*', 'src'), ('int', 'srcDevice'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')]
-    case HIP_API_ID_hipMemcpyPeerAsync:
-      break;
-// hipGetDeviceProperties[('hipDeviceProp_t*', 'props'), ('hipDevice_t', 'device')]
-    case HIP_API_ID_hipGetDeviceProperties:
-      if (data->args.hipGetDeviceProperties.props) data->args.hipGetDeviceProperties.props__val = *(data->args.hipGetDeviceProperties.props);
-      break;
-// hipMemcpyDtoH[('void*', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes')]
-    case HIP_API_ID_hipMemcpyDtoH:
-      break;
-// hipMemcpyWithStream[('void*', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')]
-    case HIP_API_ID_hipMemcpyWithStream:
-      break;
-// hipDeviceTotalMem[('size_t*', 'bytes'), ('hipDevice_t', 'device')]
-    case HIP_API_ID_hipDeviceTotalMem:
-      if (data->args.hipDeviceTotalMem.bytes) data->args.hipDeviceTotalMem.bytes__val = *(data->args.hipDeviceTotalMem.bytes);
-      break;
-// hipHostGetDevicePointer[('void**', 'devPtr'), ('void*', 'hstPtr'), ('unsigned int', 'flags')]
-    case HIP_API_ID_hipHostGetDevicePointer:
-      if (data->args.hipHostGetDevicePointer.devPtr) data->args.hipHostGetDevicePointer.devPtr__val = *(data->args.hipHostGetDevicePointer.devPtr);
-      break;
-// hipMemRangeGetAttributes[('void**', 'data'), ('size_t*', 'data_sizes'), ('hipMemRangeAttribute*', 'attributes'), ('size_t', 'num_attributes'), ('const void*', 'dev_ptr'), ('size_t', 'count')]
-    case HIP_API_ID_hipMemRangeGetAttributes:
-      if (data->args.hipMemRangeGetAttributes.data) data->args.hipMemRangeGetAttributes.data__val = *(data->args.hipMemRangeGetAttributes.data);
-      if (data->args.hipMemRangeGetAttributes.data_sizes) data->args.hipMemRangeGetAttributes.data_sizes__val = *(data->args.hipMemRangeGetAttributes.data_sizes);
-      if (data->args.hipMemRangeGetAttributes.attributes) data->args.hipMemRangeGetAttributes.attributes__val = *(data->args.hipMemRangeGetAttributes.attributes);
-      break;
-// hipMemcpyParam2D[('const hip_Memcpy2D*', 'pCopy')]
-    case HIP_API_ID_hipMemcpyParam2D:
-      if (data->args.hipMemcpyParam2D.pCopy) data->args.hipMemcpyParam2D.pCopy__val = *(data->args.hipMemcpyParam2D.pCopy);
-      break;
-// hipDevicePrimaryCtxReset[('hipDevice_t', 'dev')]
-    case HIP_API_ID_hipDevicePrimaryCtxReset:
-      break;
-// hipGetMipmappedArrayLevel[('hipArray_t*', 'levelArray'), ('hipMipmappedArray_const_t', 'mipmappedArray'), ('unsigned int', 'level')]
-    case HIP_API_ID_hipGetMipmappedArrayLevel:
-      if (data->args.hipGetMipmappedArrayLevel.levelArray) data->args.hipGetMipmappedArrayLevel.levelArray__val = *(data->args.hipGetMipmappedArrayLevel.levelArray);
-      break;
-// hipMemsetD32Async[('hipDeviceptr_t', 'dst'), ('int', 'value'), ('size_t', 'count'), ('hipStream_t', 'stream')]
-    case HIP_API_ID_hipMemsetD32Async:
-      break;
-// hipGetDevice[('int*', 'deviceId')]
-    case HIP_API_ID_hipGetDevice:
-      if (data->args.hipGetDevice.deviceId) data->args.hipGetDevice.deviceId__val = *(data->args.hipGetDevice.deviceId);
-      break;
-// hipGetDeviceCount[('int*', 'count')]
-    case HIP_API_ID_hipGetDeviceCount:
-      if (data->args.hipGetDeviceCount.count) data->args.hipGetDeviceCount.count__val = *(data->args.hipGetDeviceCount.count);
-      break;
-// hipIpcOpenEventHandle[('hipEvent_t*', 'event'), ('hipIpcEventHandle_t', 'handle')]
-    case HIP_API_ID_hipIpcOpenEventHandle:
-      if (data->args.hipIpcOpenEventHandle.event) data->args.hipIpcOpenEventHandle.event__val = *(data->args.hipIpcOpenEventHandle.event);
-      break;
-    default: break;
-  };
-}
-
-#include <sstream>
-#include <string>
-// HIP API string method, method name and parameters
-static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* data) {
-  std::ostringstream oss;
-  switch (id) {
-    case HIP_API_ID_hipDrvMemcpy3DAsync:
-      oss << "hipDrvMemcpy3DAsync(";
-      if (data->args.hipDrvMemcpy3DAsync.pCopy == NULL) oss << "pCopy=NULL";
-      else oss << "pCopy=" << data->args.hipDrvMemcpy3DAsync.pCopy__val;
-      oss << ", stream=" << data->args.hipDrvMemcpy3DAsync.stream;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipDeviceEnablePeerAccess:
-      oss << "hipDeviceEnablePeerAccess(";
-      oss << "peerDeviceId=" << data->args.hipDeviceEnablePeerAccess.peerDeviceId;
-      oss << ", flags=" << data->args.hipDeviceEnablePeerAccess.flags;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipFuncSetSharedMemConfig:
-      oss << "hipFuncSetSharedMemConfig(";
-      oss << "func=" << data->args.hipFuncSetSharedMemConfig.func;
-      oss << ", config=" << data->args.hipFuncSetSharedMemConfig.config;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemcpyToSymbolAsync:
-      oss << "hipMemcpyToSymbolAsync(";
-      oss << "symbol=" << data->args.hipMemcpyToSymbolAsync.symbol;
-      oss << ", src=" << data->args.hipMemcpyToSymbolAsync.src;
-      oss << ", sizeBytes=" << data->args.hipMemcpyToSymbolAsync.sizeBytes;
-      oss << ", offset=" << data->args.hipMemcpyToSymbolAsync.offset;
-      oss << ", kind=" << data->args.hipMemcpyToSymbolAsync.kind;
-      oss << ", stream=" << data->args.hipMemcpyToSymbolAsync.stream;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMallocPitch:
-      oss << "hipMallocPitch(";
-      if (data->args.hipMallocPitch.ptr == NULL) oss << "ptr=NULL";
-      else oss << "ptr=" << data->args.hipMallocPitch.ptr__val;
-      if (data->args.hipMallocPitch.pitch == NULL) oss << ", pitch=NULL";
-      else oss << ", pitch=" << data->args.hipMallocPitch.pitch__val;
-      oss << ", width=" << data->args.hipMallocPitch.width;
-      oss << ", height=" << data->args.hipMallocPitch.height;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMalloc:
-      oss << "hipMalloc(";
-      if (data->args.hipMalloc.ptr == NULL) oss << "ptr=NULL";
-      else oss << "ptr=" << data->args.hipMalloc.ptr__val;
-      oss << ", size=" << data->args.hipMalloc.size;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemsetD16:
-      oss << "hipMemsetD16(";
-      oss << "dest=" << data->args.hipMemsetD16.dest;
-      oss << ", value=" << data->args.hipMemsetD16.value;
-      oss << ", count=" << data->args.hipMemsetD16.count;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipExtStreamGetCUMask:
-      oss << "hipExtStreamGetCUMask(";
-      oss << "stream=" << data->args.hipExtStreamGetCUMask.stream;
-      oss << ", cuMaskSize=" << data->args.hipExtStreamGetCUMask.cuMaskSize;
-      if (data->args.hipExtStreamGetCUMask.cuMask == NULL) oss << ", cuMask=NULL";
-      else oss << ", cuMask=" << data->args.hipExtStreamGetCUMask.cuMask__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipEventRecord:
-      oss << "hipEventRecord(";
-      oss << "event=" << data->args.hipEventRecord.event;
-      oss << ", stream=" << data->args.hipEventRecord.stream;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipCtxSynchronize:
-      oss << "hipCtxSynchronize(";
-      oss << ")";
-    break;
-    case HIP_API_ID_hipSetDevice:
-      oss << "hipSetDevice(";
-      oss << "deviceId=" << data->args.hipSetDevice.deviceId;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipCtxGetApiVersion:
-      oss << "hipCtxGetApiVersion(";
-      oss << "ctx=" << data->args.hipCtxGetApiVersion.ctx;
-      if (data->args.hipCtxGetApiVersion.apiVersion == NULL) oss << ", apiVersion=NULL";
-      else oss << ", apiVersion=" << data->args.hipCtxGetApiVersion.apiVersion__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemcpyFromSymbolAsync:
-      oss << "hipMemcpyFromSymbolAsync(";
-      oss << "dst=" << data->args.hipMemcpyFromSymbolAsync.dst;
-      oss << ", symbol=" << data->args.hipMemcpyFromSymbolAsync.symbol;
-      oss << ", sizeBytes=" << data->args.hipMemcpyFromSymbolAsync.sizeBytes;
-      oss << ", offset=" << data->args.hipMemcpyFromSymbolAsync.offset;
-      oss << ", kind=" << data->args.hipMemcpyFromSymbolAsync.kind;
-      oss << ", stream=" << data->args.hipMemcpyFromSymbolAsync.stream;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipExtGetLinkTypeAndHopCount:
-      oss << "hipExtGetLinkTypeAndHopCount(";
-      oss << "device1=" << data->args.hipExtGetLinkTypeAndHopCount.device1;
-      oss << ", device2=" << data->args.hipExtGetLinkTypeAndHopCount.device2;
-      if (data->args.hipExtGetLinkTypeAndHopCount.linktype == NULL) oss << ", linktype=NULL";
-      else oss << ", linktype=" << data->args.hipExtGetLinkTypeAndHopCount.linktype__val;
-      if (data->args.hipExtGetLinkTypeAndHopCount.hopcount == NULL) oss << ", hopcount=NULL";
-      else oss << ", hopcount=" << data->args.hipExtGetLinkTypeAndHopCount.hopcount__val;
-      oss << ")";
-    break;
-    case HIP_API_ID___hipPopCallConfiguration:
-      oss << "__hipPopCallConfiguration(";
-      if (data->args.__hipPopCallConfiguration.gridDim == NULL) oss << "gridDim=NULL";
-      else oss << "gridDim=" << data->args.__hipPopCallConfiguration.gridDim__val;
-      if (data->args.__hipPopCallConfiguration.blockDim == NULL) oss << ", blockDim=NULL";
-      else oss << ", blockDim=" << data->args.__hipPopCallConfiguration.blockDim__val;
-      if (data->args.__hipPopCallConfiguration.sharedMem == NULL) oss << ", sharedMem=NULL";
-      else oss << ", sharedMem=" << data->args.__hipPopCallConfiguration.sharedMem__val;
-      if (data->args.__hipPopCallConfiguration.stream == NULL) oss << ", stream=NULL";
-      else oss << ", stream=" << data->args.__hipPopCallConfiguration.stream__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor:
-      oss << "hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(";
-      if (data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks == NULL) oss << "numBlocks=NULL";
-      else oss << "numBlocks=" << data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks__val;
-      oss << ", f=" << data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.f;
-      oss << ", blockSize=" << data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.blockSize;
-      oss << ", dynSharedMemPerBlk=" << data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.dynSharedMemPerBlk;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemset3D:
-      oss << "hipMemset3D(";
-      oss << "pitchedDevPtr=" << data->args.hipMemset3D.pitchedDevPtr;
-      oss << ", value=" << data->args.hipMemset3D.value;
-      oss << ", extent=" << data->args.hipMemset3D.extent;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipStreamCreateWithPriority:
-      oss << "hipStreamCreateWithPriority(";
-      if (data->args.hipStreamCreateWithPriority.stream == NULL) oss << "stream=NULL";
-      else oss << "stream=" << data->args.hipStreamCreateWithPriority.stream__val;
-      oss << ", flags=" << data->args.hipStreamCreateWithPriority.flags;
-      oss << ", priority=" << data->args.hipStreamCreateWithPriority.priority;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemcpy2DToArray:
-      oss << "hipMemcpy2DToArray(";
-      if (data->args.hipMemcpy2DToArray.dst == NULL) oss << "dst=NULL";
-      else oss << "dst=" << data->args.hipMemcpy2DToArray.dst__val;
-      oss << ", wOffset=" << data->args.hipMemcpy2DToArray.wOffset;
-      oss << ", hOffset=" << data->args.hipMemcpy2DToArray.hOffset;
-      oss << ", src=" << data->args.hipMemcpy2DToArray.src;
-      oss << ", spitch=" << data->args.hipMemcpy2DToArray.spitch;
-      oss << ", width=" << data->args.hipMemcpy2DToArray.width;
-      oss << ", height=" << data->args.hipMemcpy2DToArray.height;
-      oss << ", kind=" << data->args.hipMemcpy2DToArray.kind;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemsetD8Async:
-      oss << "hipMemsetD8Async(";
-      oss << "dest=" << data->args.hipMemsetD8Async.dest;
-      oss << ", value=" << data->args.hipMemsetD8Async.value;
-      oss << ", count=" << data->args.hipMemsetD8Async.count;
-      oss << ", stream=" << data->args.hipMemsetD8Async.stream;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipCtxGetCacheConfig:
-      oss << "hipCtxGetCacheConfig(";
-      if (data->args.hipCtxGetCacheConfig.cacheConfig == NULL) oss << "cacheConfig=NULL";
-      else oss << "cacheConfig=" << data->args.hipCtxGetCacheConfig.cacheConfig__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipModuleGetFunction:
-      oss << "hipModuleGetFunction(";
-      if (data->args.hipModuleGetFunction.function == NULL) oss << "function=NULL";
-      else oss << "function=" << data->args.hipModuleGetFunction.function__val;
-      oss << ", module=" << data->args.hipModuleGetFunction.module;
-      if (data->args.hipModuleGetFunction.kname == NULL) oss << ", kname=NULL";
-      else oss << ", kname=" << data->args.hipModuleGetFunction.kname__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipStreamWaitEvent:
-      oss << "hipStreamWaitEvent(";
-      oss << "stream=" << data->args.hipStreamWaitEvent.stream;
-      oss << ", event=" << data->args.hipStreamWaitEvent.event;
-      oss << ", flags=" << data->args.hipStreamWaitEvent.flags;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipDeviceGetStreamPriorityRange:
-      oss << "hipDeviceGetStreamPriorityRange(";
-      if (data->args.hipDeviceGetStreamPriorityRange.leastPriority == NULL) oss << "leastPriority=NULL";
-      else oss << "leastPriority=" << data->args.hipDeviceGetStreamPriorityRange.leastPriority__val;
-      if (data->args.hipDeviceGetStreamPriorityRange.greatestPriority == NULL) oss << ", greatestPriority=NULL";
-      else oss << ", greatestPriority=" << data->args.hipDeviceGetStreamPriorityRange.greatestPriority__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipModuleLoad:
-      oss << "hipModuleLoad(";
-      if (data->args.hipModuleLoad.module == NULL) oss << "module=NULL";
-      else oss << "module=" << data->args.hipModuleLoad.module__val;
-      if (data->args.hipModuleLoad.fname == NULL) oss << ", fname=NULL";
-      else oss << ", fname=" << data->args.hipModuleLoad.fname__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipDevicePrimaryCtxSetFlags:
-      oss << "hipDevicePrimaryCtxSetFlags(";
-      oss << "dev=" << data->args.hipDevicePrimaryCtxSetFlags.dev;
-      oss << ", flags=" << data->args.hipDevicePrimaryCtxSetFlags.flags;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipLaunchCooperativeKernel:
-      oss << "hipLaunchCooperativeKernel(";
-      oss << "f=" << data->args.hipLaunchCooperativeKernel.f;
-      oss << ", gridDim=" << data->args.hipLaunchCooperativeKernel.gridDim;
-      oss << ", blockDimX=" << data->args.hipLaunchCooperativeKernel.blockDimX;
-      if (data->args.hipLaunchCooperativeKernel.kernelParams == NULL) oss << ", kernelParams=NULL";
-      else oss << ", kernelParams=" << data->args.hipLaunchCooperativeKernel.kernelParams__val;
-      oss << ", sharedMemBytes=" << data->args.hipLaunchCooperativeKernel.sharedMemBytes;
-      oss << ", stream=" << data->args.hipLaunchCooperativeKernel.stream;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipLaunchCooperativeKernelMultiDevice:
-      oss << "hipLaunchCooperativeKernelMultiDevice(";
-      if (data->args.hipLaunchCooperativeKernelMultiDevice.launchParamsList == NULL) oss << "launchParamsList=NULL";
-      else oss << "launchParamsList=" << data->args.hipLaunchCooperativeKernelMultiDevice.launchParamsList__val;
-      oss << ", numDevices=" << data->args.hipLaunchCooperativeKernelMultiDevice.numDevices;
-      oss << ", flags=" << data->args.hipLaunchCooperativeKernelMultiDevice.flags;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemcpyAsync:
-      oss << "hipMemcpyAsync(";
-      oss << "dst=" << data->args.hipMemcpyAsync.dst;
-      oss << ", src=" << data->args.hipMemcpyAsync.src;
-      oss << ", sizeBytes=" << data->args.hipMemcpyAsync.sizeBytes;
-      oss << ", kind=" << data->args.hipMemcpyAsync.kind;
-      oss << ", stream=" << data->args.hipMemcpyAsync.stream;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMalloc3DArray:
-      oss << "hipMalloc3DArray(";
-      if (data->args.hipMalloc3DArray.array == NULL) oss << "array=NULL";
-      else oss << "array=" << data->args.hipMalloc3DArray.array__val;
-      if (data->args.hipMalloc3DArray.desc == NULL) oss << ", desc=NULL";
-      else oss << ", desc=" << data->args.hipMalloc3DArray.desc__val;
-      oss << ", extent=" << data->args.hipMalloc3DArray.extent;
-      oss << ", flags=" << data->args.hipMalloc3DArray.flags;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMallocHost:
-      oss << "hipMallocHost(";
-      if (data->args.hipMallocHost.ptr == NULL) oss << "ptr=NULL";
-      else oss << "ptr=" << data->args.hipMallocHost.ptr__val;
-      oss << ", size=" << data->args.hipMallocHost.size;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipCtxGetCurrent:
-      oss << "hipCtxGetCurrent(";
-      if (data->args.hipCtxGetCurrent.ctx == NULL) oss << "ctx=NULL";
-      else oss << "ctx=" << data->args.hipCtxGetCurrent.ctx__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipDevicePrimaryCtxGetState:
-      oss << "hipDevicePrimaryCtxGetState(";
-      oss << "dev=" << data->args.hipDevicePrimaryCtxGetState.dev;
-      if (data->args.hipDevicePrimaryCtxGetState.flags == NULL) oss << ", flags=NULL";
-      else oss << ", flags=" << data->args.hipDevicePrimaryCtxGetState.flags__val;
-      if (data->args.hipDevicePrimaryCtxGetState.active == NULL) oss << ", active=NULL";
-      else oss << ", active=" << data->args.hipDevicePrimaryCtxGetState.active__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipEventQuery:
-      oss << "hipEventQuery(";
-      oss << "event=" << data->args.hipEventQuery.event;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipEventCreate:
-      oss << "hipEventCreate(";
-      if (data->args.hipEventCreate.event == NULL) oss << "event=NULL";
-      else oss << "event=" << data->args.hipEventCreate.event__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemGetAddressRange:
-      oss << "hipMemGetAddressRange(";
-      if (data->args.hipMemGetAddressRange.pbase == NULL) oss << "pbase=NULL";
-      else oss << "pbase=" << data->args.hipMemGetAddressRange.pbase__val;
-      if (data->args.hipMemGetAddressRange.psize == NULL) oss << ", psize=NULL";
-      else oss << ", psize=" << data->args.hipMemGetAddressRange.psize__val;
-      oss << ", dptr=" << data->args.hipMemGetAddressRange.dptr;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemcpyFromSymbol:
-      oss << "hipMemcpyFromSymbol(";
-      oss << "dst=" << data->args.hipMemcpyFromSymbol.dst;
-      oss << ", symbol=" << data->args.hipMemcpyFromSymbol.symbol;
-      oss << ", sizeBytes=" << data->args.hipMemcpyFromSymbol.sizeBytes;
-      oss << ", offset=" << data->args.hipMemcpyFromSymbol.offset;
-      oss << ", kind=" << data->args.hipMemcpyFromSymbol.kind;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipArrayCreate:
-      oss << "hipArrayCreate(";
-      if (data->args.hipArrayCreate.pHandle == NULL) oss << "pHandle=NULL";
-      else oss << "pHandle=" << (void*)data->args.hipArrayCreate.pHandle__val;
-      if (data->args.hipArrayCreate.pAllocateArray == NULL) oss << ", pAllocateArray=NULL";
-      else oss << ", pAllocateArray=" << data->args.hipArrayCreate.pAllocateArray__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipStreamAttachMemAsync:
-      oss << "hipStreamAttachMemAsync(";
-      oss << "stream=" << data->args.hipStreamAttachMemAsync.stream;
-      if (data->args.hipStreamAttachMemAsync.dev_ptr == NULL) oss << ", dev_ptr=NULL";
-      else oss << ", dev_ptr=" << data->args.hipStreamAttachMemAsync.dev_ptr__val;
-      oss << ", length=" << data->args.hipStreamAttachMemAsync.length;
-      oss << ", flags=" << data->args.hipStreamAttachMemAsync.flags;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipStreamGetFlags:
-      oss << "hipStreamGetFlags(";
-      oss << "stream=" << data->args.hipStreamGetFlags.stream;
-      if (data->args.hipStreamGetFlags.flags == NULL) oss << ", flags=NULL";
-      else oss << ", flags=" << data->args.hipStreamGetFlags.flags__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMallocArray:
-      oss << "hipMallocArray(";
-      if (data->args.hipMallocArray.array == NULL) oss << "array=NULL";
-      else oss << "array=" << (void*)data->args.hipMallocArray.array__val;
-      if (data->args.hipMallocArray.desc == NULL) oss << ", desc=NULL";
-      else oss << ", desc=" << data->args.hipMallocArray.desc__val;
-      oss << ", width=" << data->args.hipMallocArray.width;
-      oss << ", height=" << data->args.hipMallocArray.height;
-      oss << ", flags=" << data->args.hipMallocArray.flags;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipCtxGetSharedMemConfig:
-      oss << "hipCtxGetSharedMemConfig(";
-      if (data->args.hipCtxGetSharedMemConfig.pConfig == NULL) oss << "pConfig=NULL";
-      else oss << "pConfig=" << data->args.hipCtxGetSharedMemConfig.pConfig__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipDeviceDisablePeerAccess:
-      oss << "hipDeviceDisablePeerAccess(";
-      oss << "peerDeviceId=" << data->args.hipDeviceDisablePeerAccess.peerDeviceId;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSize:
-      oss << "hipModuleOccupancyMaxPotentialBlockSize(";
-      if (data->args.hipModuleOccupancyMaxPotentialBlockSize.gridSize == NULL) oss << "gridSize=NULL";
-      else oss << "gridSize=" << data->args.hipModuleOccupancyMaxPotentialBlockSize.gridSize__val;
-      if (data->args.hipModuleOccupancyMaxPotentialBlockSize.blockSize == NULL) oss << ", blockSize=NULL";
-      else oss << ", blockSize=" << data->args.hipModuleOccupancyMaxPotentialBlockSize.blockSize__val;
-      oss << ", f=" << data->args.hipModuleOccupancyMaxPotentialBlockSize.f;
-      oss << ", dynSharedMemPerBlk=" << data->args.hipModuleOccupancyMaxPotentialBlockSize.dynSharedMemPerBlk;
-      oss << ", blockSizeLimit=" << data->args.hipModuleOccupancyMaxPotentialBlockSize.blockSizeLimit;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemPtrGetInfo:
-      oss << "hipMemPtrGetInfo(";
-      oss << "ptr=" << data->args.hipMemPtrGetInfo.ptr;
-      if (data->args.hipMemPtrGetInfo.size == NULL) oss << ", size=NULL";
-      else oss << ", size=" << data->args.hipMemPtrGetInfo.size__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipFuncGetAttribute:
-      oss << "hipFuncGetAttribute(";
-      if (data->args.hipFuncGetAttribute.value == NULL) oss << "value=NULL";
-      else oss << "value=" << data->args.hipFuncGetAttribute.value__val;
-      oss << ", attrib=" << data->args.hipFuncGetAttribute.attrib;
-      oss << ", hfunc=" << data->args.hipFuncGetAttribute.hfunc;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipCtxGetFlags:
-      oss << "hipCtxGetFlags(";
-      if (data->args.hipCtxGetFlags.flags == NULL) oss << "flags=NULL";
-      else oss << "flags=" << data->args.hipCtxGetFlags.flags__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipStreamDestroy:
-      oss << "hipStreamDestroy(";
-      oss << "stream=" << data->args.hipStreamDestroy.stream;
-      oss << ")";
-    break;
-    case HIP_API_ID___hipPushCallConfiguration:
-      oss << "__hipPushCallConfiguration(";
-      oss << "gridDim=" << data->args.__hipPushCallConfiguration.gridDim;
-      oss << ", blockDim=" << data->args.__hipPushCallConfiguration.blockDim;
-      oss << ", sharedMem=" << data->args.__hipPushCallConfiguration.sharedMem;
-      oss << ", stream=" << data->args.__hipPushCallConfiguration.stream;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemset3DAsync:
-      oss << "hipMemset3DAsync(";
-      oss << "pitchedDevPtr=" << data->args.hipMemset3DAsync.pitchedDevPtr;
-      oss << ", value=" << data->args.hipMemset3DAsync.value;
-      oss << ", extent=" << data->args.hipMemset3DAsync.extent;
-      oss << ", stream=" << data->args.hipMemset3DAsync.stream;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipDeviceGetPCIBusId:
-      oss << "hipDeviceGetPCIBusId(";
-      if (data->args.hipDeviceGetPCIBusId.pciBusId == NULL) oss << "pciBusId=NULL";
-      else oss << "pciBusId=" << data->args.hipDeviceGetPCIBusId.pciBusId__val;
-      oss << ", len=" << data->args.hipDeviceGetPCIBusId.len;
-      oss << ", device=" << data->args.hipDeviceGetPCIBusId.device;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipInit:
-      oss << "hipInit(";
-      oss << "flags=" << data->args.hipInit.flags;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemcpyAtoH:
-      oss << "hipMemcpyAtoH(";
-      oss << "dst=" << data->args.hipMemcpyAtoH.dst;
-      if (data->args.hipMemcpyAtoH.srcArray == NULL) oss << ", srcArray=NULL";
-      else oss << ", srcArray=" << data->args.hipMemcpyAtoH.srcArray__val;
-      oss << ", srcOffset=" << data->args.hipMemcpyAtoH.srcOffset;
-      oss << ", count=" << data->args.hipMemcpyAtoH.count;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipStreamGetPriority:
-      oss << "hipStreamGetPriority(";
-      oss << "stream=" << data->args.hipStreamGetPriority.stream;
-      if (data->args.hipStreamGetPriority.priority == NULL) oss << ", priority=NULL";
-      else oss << ", priority=" << data->args.hipStreamGetPriority.priority__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemset2D:
-      oss << "hipMemset2D(";
-      oss << "dst=" << data->args.hipMemset2D.dst;
-      oss << ", pitch=" << data->args.hipMemset2D.pitch;
-      oss << ", value=" << data->args.hipMemset2D.value;
-      oss << ", width=" << data->args.hipMemset2D.width;
-      oss << ", height=" << data->args.hipMemset2D.height;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemset2DAsync:
-      oss << "hipMemset2DAsync(";
-      oss << "dst=" << data->args.hipMemset2DAsync.dst;
-      oss << ", pitch=" << data->args.hipMemset2DAsync.pitch;
-      oss << ", value=" << data->args.hipMemset2DAsync.value;
-      oss << ", width=" << data->args.hipMemset2DAsync.width;
-      oss << ", height=" << data->args.hipMemset2DAsync.height;
-      oss << ", stream=" << data->args.hipMemset2DAsync.stream;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipDeviceCanAccessPeer:
-      oss << "hipDeviceCanAccessPeer(";
-      if (data->args.hipDeviceCanAccessPeer.canAccessPeer == NULL) oss << "canAccessPeer=NULL";
-      else oss << "canAccessPeer=" << data->args.hipDeviceCanAccessPeer.canAccessPeer__val;
-      oss << ", deviceId=" << data->args.hipDeviceCanAccessPeer.deviceId;
-      oss << ", peerDeviceId=" << data->args.hipDeviceCanAccessPeer.peerDeviceId;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipLaunchByPtr:
-      oss << "hipLaunchByPtr(";
-      oss << "hostFunction=" << data->args.hipLaunchByPtr.hostFunction;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemPrefetchAsync:
-      oss << "hipMemPrefetchAsync(";
-      oss << "dev_ptr=" << data->args.hipMemPrefetchAsync.dev_ptr;
-      oss << ", count=" << data->args.hipMemPrefetchAsync.count;
-      oss << ", device=" << data->args.hipMemPrefetchAsync.device;
-      oss << ", stream=" << data->args.hipMemPrefetchAsync.stream;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipCtxDestroy:
-      oss << "hipCtxDestroy(";
-      oss << "ctx=" << data->args.hipCtxDestroy.ctx;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemsetD16Async:
-      oss << "hipMemsetD16Async(";
-      oss << "dest=" << data->args.hipMemsetD16Async.dest;
-      oss << ", value=" << data->args.hipMemsetD16Async.value;
-      oss << ", count=" << data->args.hipMemsetD16Async.count;
-      oss << ", stream=" << data->args.hipMemsetD16Async.stream;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipModuleUnload:
-      oss << "hipModuleUnload(";
-      oss << "module=" << data->args.hipModuleUnload.module;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipHostUnregister:
-      oss << "hipHostUnregister(";
-      oss << "hostPtr=" << data->args.hipHostUnregister.hostPtr;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipProfilerStop:
-      oss << "hipProfilerStop(";
-      oss << ")";
-    break;
-    case HIP_API_ID_hipExtStreamCreateWithCUMask:
-      oss << "hipExtStreamCreateWithCUMask(";
-      if (data->args.hipExtStreamCreateWithCUMask.stream == NULL) oss << "stream=NULL";
-      else oss << "stream=" << data->args.hipExtStreamCreateWithCUMask.stream__val;
-      oss << ", cuMaskSize=" << data->args.hipExtStreamCreateWithCUMask.cuMaskSize;
-      if (data->args.hipExtStreamCreateWithCUMask.cuMask == NULL) oss << ", cuMask=NULL";
-      else oss << ", cuMask=" << data->args.hipExtStreamCreateWithCUMask.cuMask__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipStreamSynchronize:
-      oss << "hipStreamSynchronize(";
-      oss << "stream=" << data->args.hipStreamSynchronize.stream;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipFreeHost:
-      oss << "hipFreeHost(";
-      oss << "ptr=" << data->args.hipFreeHost.ptr;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipDeviceSetCacheConfig:
-      oss << "hipDeviceSetCacheConfig(";
-      oss << "cacheConfig=" << data->args.hipDeviceSetCacheConfig.cacheConfig;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipGetErrorName:
-      oss << "hipGetErrorName(";
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemcpyHtoD:
-      oss << "hipMemcpyHtoD(";
-      oss << "dst=" << data->args.hipMemcpyHtoD.dst;
-      oss << ", src=" << data->args.hipMemcpyHtoD.src;
-      oss << ", sizeBytes=" << data->args.hipMemcpyHtoD.sizeBytes;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipModuleGetGlobal:
-      oss << "hipModuleGetGlobal(";
-      if (data->args.hipModuleGetGlobal.dptr == NULL) oss << "dptr=NULL";
-      else oss << "dptr=" << data->args.hipModuleGetGlobal.dptr__val;
-      if (data->args.hipModuleGetGlobal.bytes == NULL) oss << ", bytes=NULL";
-      else oss << ", bytes=" << data->args.hipModuleGetGlobal.bytes__val;
-      oss << ", hmod=" << data->args.hipModuleGetGlobal.hmod;
-      if (data->args.hipModuleGetGlobal.name == NULL) oss << ", name=NULL";
-      else oss << ", name=" << data->args.hipModuleGetGlobal.name__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemcpyHtoA:
-      oss << "hipMemcpyHtoA(";
-      if (data->args.hipMemcpyHtoA.dstArray == NULL) oss << "dstArray=NULL";
-      else oss << "dstArray=" << data->args.hipMemcpyHtoA.dstArray__val;
-      oss << ", dstOffset=" << data->args.hipMemcpyHtoA.dstOffset;
-      oss << ", srcHost=" << data->args.hipMemcpyHtoA.srcHost;
-      oss << ", count=" << data->args.hipMemcpyHtoA.count;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipCtxCreate:
-      oss << "hipCtxCreate(";
-      if (data->args.hipCtxCreate.ctx == NULL) oss << "ctx=NULL";
-      else oss << "ctx=" << data->args.hipCtxCreate.ctx__val;
-      oss << ", flags=" << data->args.hipCtxCreate.flags;
-      oss << ", device=" << data->args.hipCtxCreate.device;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemcpy2D:
-      oss << "hipMemcpy2D(";
-      oss << "dst=" << data->args.hipMemcpy2D.dst;
-      oss << ", dpitch=" << data->args.hipMemcpy2D.dpitch;
-      oss << ", src=" << data->args.hipMemcpy2D.src;
-      oss << ", spitch=" << data->args.hipMemcpy2D.spitch;
-      oss << ", width=" << data->args.hipMemcpy2D.width;
-      oss << ", height=" << data->args.hipMemcpy2D.height;
-      oss << ", kind=" << data->args.hipMemcpy2D.kind;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipIpcCloseMemHandle:
-      oss << "hipIpcCloseMemHandle(";
-      oss << "devPtr=" << data->args.hipIpcCloseMemHandle.devPtr;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipChooseDevice:
-      oss << "hipChooseDevice(";
-      if (data->args.hipChooseDevice.device == NULL) oss << "device=NULL";
-      else oss << "device=" << data->args.hipChooseDevice.device__val;
-      if (data->args.hipChooseDevice.prop == NULL) oss << ", prop=NULL";
-      else oss << ", prop=" << data->args.hipChooseDevice.prop__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipDeviceSetSharedMemConfig:
-      oss << "hipDeviceSetSharedMemConfig(";
-      oss << "config=" << data->args.hipDeviceSetSharedMemConfig.config;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMallocMipmappedArray:
-      oss << "hipMallocMipmappedArray(";
-      if (data->args.hipMallocMipmappedArray.mipmappedArray == NULL) oss << "mipmappedArray=NULL";
-      else oss << "mipmappedArray=" << data->args.hipMallocMipmappedArray.mipmappedArray__val;
-      if (data->args.hipMallocMipmappedArray.desc == NULL) oss << ", desc=NULL";
-      else oss << ", desc=" << data->args.hipMallocMipmappedArray.desc__val;
-      oss << ", extent=" << data->args.hipMallocMipmappedArray.extent;
-      oss << ", numLevels=" << data->args.hipMallocMipmappedArray.numLevels;
-      oss << ", flags=" << data->args.hipMallocMipmappedArray.flags;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipSetupArgument:
-      oss << "hipSetupArgument(";
-      oss << "arg=" << data->args.hipSetupArgument.arg;
-      oss << ", size=" << data->args.hipSetupArgument.size;
-      oss << ", offset=" << data->args.hipSetupArgument.offset;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipIpcGetEventHandle:
-      oss << "hipIpcGetEventHandle(";
-      if (data->args.hipIpcGetEventHandle.handle == NULL) oss << "handle=NULL";
-      else oss << "handle=" << data->args.hipIpcGetEventHandle.handle__val;
-      oss << ", event=" << data->args.hipIpcGetEventHandle.event;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipFreeArray:
-      oss << "hipFreeArray(";
-      if (data->args.hipFreeArray.array == NULL) oss << "array=NULL";
-      else oss << "array=" << data->args.hipFreeArray.array__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipCtxSetCacheConfig:
-      oss << "hipCtxSetCacheConfig(";
-      oss << "cacheConfig=" << data->args.hipCtxSetCacheConfig.cacheConfig;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipFuncSetCacheConfig:
-      oss << "hipFuncSetCacheConfig(";
-      oss << "func=" << data->args.hipFuncSetCacheConfig.func;
-      oss << ", config=" << data->args.hipFuncSetCacheConfig.config;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipLaunchKernel:
-      oss << "hipLaunchKernel(";
-      oss << "function_address=" << data->args.hipLaunchKernel.function_address;
-      oss << ", numBlocks=" << data->args.hipLaunchKernel.numBlocks;
-      oss << ", dimBlocks=" << data->args.hipLaunchKernel.dimBlocks;
-      if (data->args.hipLaunchKernel.args == NULL) oss << ", args=NULL";
-      else oss << ", args=" << data->args.hipLaunchKernel.args__val;
-      oss << ", sharedMemBytes=" << data->args.hipLaunchKernel.sharedMemBytes;
-      oss << ", stream=" << data->args.hipLaunchKernel.stream;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags:
-      oss << "hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(";
-      if (data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks == NULL) oss << "numBlocks=NULL";
-      else oss << "numBlocks=" << data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks__val;
-      oss << ", f=" << data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.f;
-      oss << ", blockSize=" << data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.blockSize;
-      oss << ", dynSharedMemPerBlk=" << data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.dynSharedMemPerBlk;
-      oss << ", flags=" << data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.flags;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipModuleGetTexRef:
-      oss << "hipModuleGetTexRef(";
-      if (data->args.hipModuleGetTexRef.texRef == NULL) oss << "texRef=NULL";
-      else oss << "texRef=" << (void*)data->args.hipModuleGetTexRef.texRef__val;
-      oss << ", hmod=" << data->args.hipModuleGetTexRef.hmod;
-      if (data->args.hipModuleGetTexRef.name == NULL) oss << ", name=NULL";
-      else oss << ", name=" << data->args.hipModuleGetTexRef.name__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipFuncSetAttribute:
-      oss << "hipFuncSetAttribute(";
-      oss << "func=" << data->args.hipFuncSetAttribute.func;
-      oss << ", attr=" << data->args.hipFuncSetAttribute.attr;
-      oss << ", value=" << data->args.hipFuncSetAttribute.value;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipEventElapsedTime:
-      oss << "hipEventElapsedTime(";
-      if (data->args.hipEventElapsedTime.ms == NULL) oss << "ms=NULL";
-      else oss << "ms=" << data->args.hipEventElapsedTime.ms__val;
-      oss << ", start=" << data->args.hipEventElapsedTime.start;
-      oss << ", stop=" << data->args.hipEventElapsedTime.stop;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipConfigureCall:
-      oss << "hipConfigureCall(";
-      oss << "gridDim=" << data->args.hipConfigureCall.gridDim;
-      oss << ", blockDim=" << data->args.hipConfigureCall.blockDim;
-      oss << ", sharedMem=" << data->args.hipConfigureCall.sharedMem;
-      oss << ", stream=" << data->args.hipConfigureCall.stream;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemAdvise:
-      oss << "hipMemAdvise(";
-      oss << "dev_ptr=" << data->args.hipMemAdvise.dev_ptr;
-      oss << ", count=" << data->args.hipMemAdvise.count;
-      oss << ", advice=" << data->args.hipMemAdvise.advice;
-      oss << ", device=" << data->args.hipMemAdvise.device;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemcpy3DAsync:
-      oss << "hipMemcpy3DAsync(";
-      if (data->args.hipMemcpy3DAsync.p == NULL) oss << "p=NULL";
-      else oss << "p=" << data->args.hipMemcpy3DAsync.p__val;
-      oss << ", stream=" << data->args.hipMemcpy3DAsync.stream;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipEventDestroy:
-      oss << "hipEventDestroy(";
-      oss << "event=" << data->args.hipEventDestroy.event;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipCtxPopCurrent:
-      oss << "hipCtxPopCurrent(";
-      if (data->args.hipCtxPopCurrent.ctx == NULL) oss << "ctx=NULL";
-      else oss << "ctx=" << data->args.hipCtxPopCurrent.ctx__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipGetSymbolAddress:
-      oss << "hipGetSymbolAddress(";
-      if (data->args.hipGetSymbolAddress.devPtr == NULL) oss << "devPtr=NULL";
-      else oss << "devPtr=" << data->args.hipGetSymbolAddress.devPtr__val;
-      oss << ", symbol=" << data->args.hipGetSymbolAddress.symbol;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipHostGetFlags:
-      oss << "hipHostGetFlags(";
-      if (data->args.hipHostGetFlags.flagsPtr == NULL) oss << "flagsPtr=NULL";
-      else oss << "flagsPtr=" << data->args.hipHostGetFlags.flagsPtr__val;
-      oss << ", hostPtr=" << data->args.hipHostGetFlags.hostPtr;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipHostMalloc:
-      oss << "hipHostMalloc(";
-      if (data->args.hipHostMalloc.ptr == NULL) oss << "ptr=NULL";
-      else oss << "ptr=" << data->args.hipHostMalloc.ptr__val;
-      oss << ", size=" << data->args.hipHostMalloc.size;
-      oss << ", flags=" << data->args.hipHostMalloc.flags;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipCtxSetSharedMemConfig:
-      oss << "hipCtxSetSharedMemConfig(";
-      oss << "config=" << data->args.hipCtxSetSharedMemConfig.config;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipFreeMipmappedArray:
-      oss << "hipFreeMipmappedArray(";
-      oss << "mipmappedArray=" << data->args.hipFreeMipmappedArray.mipmappedArray;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemGetInfo:
-      oss << "hipMemGetInfo(";
-      if (data->args.hipMemGetInfo.free == NULL) oss << "free=NULL";
-      else oss << "free=" << data->args.hipMemGetInfo.free__val;
-      if (data->args.hipMemGetInfo.total == NULL) oss << ", total=NULL";
-      else oss << ", total=" << data->args.hipMemGetInfo.total__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipDeviceReset:
-      oss << "hipDeviceReset(";
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemset:
-      oss << "hipMemset(";
-      oss << "dst=" << data->args.hipMemset.dst;
-      oss << ", value=" << data->args.hipMemset.value;
-      oss << ", sizeBytes=" << data->args.hipMemset.sizeBytes;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemsetD8:
-      oss << "hipMemsetD8(";
-      oss << "dest=" << data->args.hipMemsetD8.dest;
-      oss << ", value=" << data->args.hipMemsetD8.value;
-      oss << ", count=" << data->args.hipMemsetD8.count;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemcpyParam2DAsync:
-      oss << "hipMemcpyParam2DAsync(";
-      if (data->args.hipMemcpyParam2DAsync.pCopy == NULL) oss << "pCopy=NULL";
-      else oss << "pCopy=" << data->args.hipMemcpyParam2DAsync.pCopy__val;
-      oss << ", stream=" << data->args.hipMemcpyParam2DAsync.stream;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipHostRegister:
-      oss << "hipHostRegister(";
-      oss << "hostPtr=" << data->args.hipHostRegister.hostPtr;
-      oss << ", sizeBytes=" << data->args.hipHostRegister.sizeBytes;
-      oss << ", flags=" << data->args.hipHostRegister.flags;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipDriverGetVersion:
-      oss << "hipDriverGetVersion(";
-      if (data->args.hipDriverGetVersion.driverVersion == NULL) oss << "driverVersion=NULL";
-      else oss << "driverVersion=" << data->args.hipDriverGetVersion.driverVersion__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipArray3DCreate:
-      oss << "hipArray3DCreate(";
-      if (data->args.hipArray3DCreate.array == NULL) oss << "array=NULL";
-      else oss << "array=" << (void*)data->args.hipArray3DCreate.array__val;
-      if (data->args.hipArray3DCreate.pAllocateArray == NULL) oss << ", pAllocateArray=NULL";
-      else oss << ", pAllocateArray=" << data->args.hipArray3DCreate.pAllocateArray__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipIpcOpenMemHandle:
-      oss << "hipIpcOpenMemHandle(";
-      if (data->args.hipIpcOpenMemHandle.devPtr == NULL) oss << "devPtr=NULL";
-      else oss << "devPtr=" << data->args.hipIpcOpenMemHandle.devPtr__val;
-      oss << ", handle=" << data->args.hipIpcOpenMemHandle.handle;
-      oss << ", flags=" << data->args.hipIpcOpenMemHandle.flags;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipGetLastError:
-      oss << "hipGetLastError(";
-      oss << ")";
-    break;
-    case HIP_API_ID_hipGetDeviceFlags:
-      oss << "hipGetDeviceFlags(";
-      if (data->args.hipGetDeviceFlags.flags == NULL) oss << "flags=NULL";
-      else oss << "flags=" << data->args.hipGetDeviceFlags.flags__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipDeviceGetSharedMemConfig:
-      oss << "hipDeviceGetSharedMemConfig(";
-      if (data->args.hipDeviceGetSharedMemConfig.pConfig == NULL) oss << "pConfig=NULL";
-      else oss << "pConfig=" << data->args.hipDeviceGetSharedMemConfig.pConfig__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipDrvMemcpy3D:
-      oss << "hipDrvMemcpy3D(";
-      if (data->args.hipDrvMemcpy3D.pCopy == NULL) oss << "pCopy=NULL";
-      else oss << "pCopy=" << data->args.hipDrvMemcpy3D.pCopy__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemcpy2DFromArray:
-      oss << "hipMemcpy2DFromArray(";
-      oss << "dst=" << data->args.hipMemcpy2DFromArray.dst;
-      oss << ", dpitch=" << data->args.hipMemcpy2DFromArray.dpitch;
-      oss << ", src=" << data->args.hipMemcpy2DFromArray.src;
-      oss << ", wOffset=" << data->args.hipMemcpy2DFromArray.wOffset;
-      oss << ", hOffset=" << data->args.hipMemcpy2DFromArray.hOffset;
-      oss << ", width=" << data->args.hipMemcpy2DFromArray.width;
-      oss << ", height=" << data->args.hipMemcpy2DFromArray.height;
-      oss << ", kind=" << data->args.hipMemcpy2DFromArray.kind;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags:
-      oss << "hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(";
-      if (data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks == NULL) oss << "numBlocks=NULL";
-      else oss << "numBlocks=" << data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks__val;
-      oss << ", f=" << data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.f;
-      oss << ", blockSize=" << data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.blockSize;
-      oss << ", dynamicSMemSize=" << data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.dynamicSMemSize;
-      oss << ", flags=" << data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.flags;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipSetDeviceFlags:
-      oss << "hipSetDeviceFlags(";
-      oss << "flags=" << data->args.hipSetDeviceFlags.flags;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipHccModuleLaunchKernel:
-      oss << "hipHccModuleLaunchKernel(";
-      oss << "f=" << data->args.hipHccModuleLaunchKernel.f;
-      oss << ", globalWorkSizeX=" << data->args.hipHccModuleLaunchKernel.globalWorkSizeX;
-      oss << ", globalWorkSizeY=" << data->args.hipHccModuleLaunchKernel.globalWorkSizeY;
-      oss << ", globalWorkSizeZ=" << data->args.hipHccModuleLaunchKernel.globalWorkSizeZ;
-      oss << ", blockDimX=" << data->args.hipHccModuleLaunchKernel.blockDimX;
-      oss << ", blockDimY=" << data->args.hipHccModuleLaunchKernel.blockDimY;
-      oss << ", blockDimZ=" << data->args.hipHccModuleLaunchKernel.blockDimZ;
-      oss << ", sharedMemBytes=" << data->args.hipHccModuleLaunchKernel.sharedMemBytes;
-      oss << ", hStream=" << data->args.hipHccModuleLaunchKernel.hStream;
-      if (data->args.hipHccModuleLaunchKernel.kernelParams == NULL) oss << ", kernelParams=NULL";
-      else oss << ", kernelParams=" << data->args.hipHccModuleLaunchKernel.kernelParams__val;
-      if (data->args.hipHccModuleLaunchKernel.extra == NULL) oss << ", extra=NULL";
-      else oss << ", extra=" << data->args.hipHccModuleLaunchKernel.extra__val;
-      oss << ", startEvent=" << data->args.hipHccModuleLaunchKernel.startEvent;
-      oss << ", stopEvent=" << data->args.hipHccModuleLaunchKernel.stopEvent;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipFree:
-      oss << "hipFree(";
-      oss << "ptr=" << data->args.hipFree.ptr;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipOccupancyMaxPotentialBlockSize:
-      oss << "hipOccupancyMaxPotentialBlockSize(";
-      if (data->args.hipOccupancyMaxPotentialBlockSize.gridSize == NULL) oss << "gridSize=NULL";
-      else oss << "gridSize=" << data->args.hipOccupancyMaxPotentialBlockSize.gridSize__val;
-      if (data->args.hipOccupancyMaxPotentialBlockSize.blockSize == NULL) oss << ", blockSize=NULL";
-      else oss << ", blockSize=" << data->args.hipOccupancyMaxPotentialBlockSize.blockSize__val;
-      oss << ", f=" << data->args.hipOccupancyMaxPotentialBlockSize.f;
-      oss << ", dynSharedMemPerBlk=" << data->args.hipOccupancyMaxPotentialBlockSize.dynSharedMemPerBlk;
-      oss << ", blockSizeLimit=" << data->args.hipOccupancyMaxPotentialBlockSize.blockSizeLimit;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipDeviceGetAttribute:
-      oss << "hipDeviceGetAttribute(";
-      if (data->args.hipDeviceGetAttribute.pi == NULL) oss << "pi=NULL";
-      else oss << "pi=" << data->args.hipDeviceGetAttribute.pi__val;
-      oss << ", attr=" << data->args.hipDeviceGetAttribute.attr;
-      oss << ", deviceId=" << data->args.hipDeviceGetAttribute.deviceId;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipDeviceComputeCapability:
-      oss << "hipDeviceComputeCapability(";
-      if (data->args.hipDeviceComputeCapability.major == NULL) oss << "major=NULL";
-      else oss << "major=" << data->args.hipDeviceComputeCapability.major__val;
-      if (data->args.hipDeviceComputeCapability.minor == NULL) oss << ", minor=NULL";
-      else oss << ", minor=" << data->args.hipDeviceComputeCapability.minor__val;
-      oss << ", device=" << data->args.hipDeviceComputeCapability.device;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipCtxDisablePeerAccess:
-      oss << "hipCtxDisablePeerAccess(";
-      oss << "peerCtx=" << data->args.hipCtxDisablePeerAccess.peerCtx;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMallocManaged:
-      oss << "hipMallocManaged(";
-      if (data->args.hipMallocManaged.dev_ptr == NULL) oss << "dev_ptr=NULL";
-      else oss << "dev_ptr=" << data->args.hipMallocManaged.dev_ptr__val;
-      oss << ", size=" << data->args.hipMallocManaged.size;
-      oss << ", flags=" << data->args.hipMallocManaged.flags;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipDeviceGetByPCIBusId:
-      oss << "hipDeviceGetByPCIBusId(";
-      if (data->args.hipDeviceGetByPCIBusId.device == NULL) oss << "device=NULL";
-      else oss << "device=" << data->args.hipDeviceGetByPCIBusId.device__val;
-      if (data->args.hipDeviceGetByPCIBusId.pciBusId == NULL) oss << ", pciBusId=NULL";
-      else oss << ", pciBusId=" << data->args.hipDeviceGetByPCIBusId.pciBusId__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipIpcGetMemHandle:
-      oss << "hipIpcGetMemHandle(";
-      if (data->args.hipIpcGetMemHandle.handle == NULL) oss << "handle=NULL";
-      else oss << "handle=" << data->args.hipIpcGetMemHandle.handle__val;
-      oss << ", devPtr=" << data->args.hipIpcGetMemHandle.devPtr;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemcpyHtoDAsync:
-      oss << "hipMemcpyHtoDAsync(";
-      oss << "dst=" << data->args.hipMemcpyHtoDAsync.dst;
-      oss << ", src=" << data->args.hipMemcpyHtoDAsync.src;
-      oss << ", sizeBytes=" << data->args.hipMemcpyHtoDAsync.sizeBytes;
-      oss << ", stream=" << data->args.hipMemcpyHtoDAsync.stream;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipCtxGetDevice:
-      oss << "hipCtxGetDevice(";
-      if (data->args.hipCtxGetDevice.device == NULL) oss << "device=NULL";
-      else oss << "device=" << data->args.hipCtxGetDevice.device__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemcpyDtoD:
-      oss << "hipMemcpyDtoD(";
-      oss << "dst=" << data->args.hipMemcpyDtoD.dst;
-      oss << ", src=" << data->args.hipMemcpyDtoD.src;
-      oss << ", sizeBytes=" << data->args.hipMemcpyDtoD.sizeBytes;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipModuleLoadData:
-      oss << "hipModuleLoadData(";
-      if (data->args.hipModuleLoadData.module == NULL) oss << "module=NULL";
-      else oss << "module=" << data->args.hipModuleLoadData.module__val;
-      oss << ", image=" << data->args.hipModuleLoadData.image;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipDevicePrimaryCtxRelease:
-      oss << "hipDevicePrimaryCtxRelease(";
-      oss << "dev=" << data->args.hipDevicePrimaryCtxRelease.dev;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessor:
-      oss << "hipOccupancyMaxActiveBlocksPerMultiprocessor(";
-      if (data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks == NULL) oss << "numBlocks=NULL";
-      else oss << "numBlocks=" << data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks__val;
-      oss << ", f=" << data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.f;
-      oss << ", blockSize=" << data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.blockSize;
-      oss << ", dynamicSMemSize=" << data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.dynamicSMemSize;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipCtxSetCurrent:
-      oss << "hipCtxSetCurrent(";
-      oss << "ctx=" << data->args.hipCtxSetCurrent.ctx;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipGetErrorString:
-      oss << "hipGetErrorString(";
-      oss << ")";
-    break;
-    case HIP_API_ID_hipStreamCreate:
-      oss << "hipStreamCreate(";
-      if (data->args.hipStreamCreate.stream == NULL) oss << "stream=NULL";
-      else oss << "stream=" << data->args.hipStreamCreate.stream__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipDevicePrimaryCtxRetain:
-      oss << "hipDevicePrimaryCtxRetain(";
-      if (data->args.hipDevicePrimaryCtxRetain.pctx == NULL) oss << "pctx=NULL";
-      else oss << "pctx=" << data->args.hipDevicePrimaryCtxRetain.pctx__val;
-      oss << ", dev=" << data->args.hipDevicePrimaryCtxRetain.dev;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipDeviceGet:
-      oss << "hipDeviceGet(";
-      if (data->args.hipDeviceGet.device == NULL) oss << "device=NULL";
-      else oss << "device=" << data->args.hipDeviceGet.device__val;
-      oss << ", ordinal=" << data->args.hipDeviceGet.ordinal;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipStreamCreateWithFlags:
-      oss << "hipStreamCreateWithFlags(";
-      if (data->args.hipStreamCreateWithFlags.stream == NULL) oss << "stream=NULL";
-      else oss << "stream=" << data->args.hipStreamCreateWithFlags.stream__val;
-      oss << ", flags=" << data->args.hipStreamCreateWithFlags.flags;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemcpyFromArray:
-      oss << "hipMemcpyFromArray(";
-      oss << "dst=" << data->args.hipMemcpyFromArray.dst;
-      oss << ", srcArray=" << data->args.hipMemcpyFromArray.srcArray;
-      oss << ", wOffset=" << data->args.hipMemcpyFromArray.wOffset;
-      oss << ", hOffset=" << data->args.hipMemcpyFromArray.hOffset;
-      oss << ", count=" << data->args.hipMemcpyFromArray.count;
-      oss << ", kind=" << data->args.hipMemcpyFromArray.kind;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemcpy2DAsync:
-      oss << "hipMemcpy2DAsync(";
-      oss << "dst=" << data->args.hipMemcpy2DAsync.dst;
-      oss << ", dpitch=" << data->args.hipMemcpy2DAsync.dpitch;
-      oss << ", src=" << data->args.hipMemcpy2DAsync.src;
-      oss << ", spitch=" << data->args.hipMemcpy2DAsync.spitch;
-      oss << ", width=" << data->args.hipMemcpy2DAsync.width;
-      oss << ", height=" << data->args.hipMemcpy2DAsync.height;
-      oss << ", kind=" << data->args.hipMemcpy2DAsync.kind;
-      oss << ", stream=" << data->args.hipMemcpy2DAsync.stream;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipFuncGetAttributes:
-      oss << "hipFuncGetAttributes(";
-      if (data->args.hipFuncGetAttributes.attr == NULL) oss << "attr=NULL";
-      else oss << "attr=" << data->args.hipFuncGetAttributes.attr__val;
-      oss << ", func=" << data->args.hipFuncGetAttributes.func;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipGetSymbolSize:
-      oss << "hipGetSymbolSize(";
-      if (data->args.hipGetSymbolSize.size == NULL) oss << "size=NULL";
-      else oss << "size=" << data->args.hipGetSymbolSize.size__val;
-      oss << ", symbol=" << data->args.hipGetSymbolSize.symbol;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipHostFree:
-      oss << "hipHostFree(";
-      oss << "ptr=" << data->args.hipHostFree.ptr;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipEventCreateWithFlags:
-      oss << "hipEventCreateWithFlags(";
-      if (data->args.hipEventCreateWithFlags.event == NULL) oss << "event=NULL";
-      else oss << "event=" << data->args.hipEventCreateWithFlags.event__val;
-      oss << ", flags=" << data->args.hipEventCreateWithFlags.flags;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipStreamQuery:
-      oss << "hipStreamQuery(";
-      oss << "stream=" << data->args.hipStreamQuery.stream;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemcpy3D:
-      oss << "hipMemcpy3D(";
-      if (data->args.hipMemcpy3D.p == NULL) oss << "p=NULL";
-      else oss << "p=" << data->args.hipMemcpy3D.p__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemcpyToSymbol:
-      oss << "hipMemcpyToSymbol(";
-      oss << "symbol=" << data->args.hipMemcpyToSymbol.symbol;
-      oss << ", src=" << data->args.hipMemcpyToSymbol.src;
-      oss << ", sizeBytes=" << data->args.hipMemcpyToSymbol.sizeBytes;
-      oss << ", offset=" << data->args.hipMemcpyToSymbol.offset;
-      oss << ", kind=" << data->args.hipMemcpyToSymbol.kind;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemcpy:
-      oss << "hipMemcpy(";
-      oss << "dst=" << data->args.hipMemcpy.dst;
-      oss << ", src=" << data->args.hipMemcpy.src;
-      oss << ", sizeBytes=" << data->args.hipMemcpy.sizeBytes;
-      oss << ", kind=" << data->args.hipMemcpy.kind;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipPeekAtLastError:
-      oss << "hipPeekAtLastError(";
-      oss << ")";
-    break;
-    case HIP_API_ID_hipExtLaunchMultiKernelMultiDevice:
-      oss << "hipExtLaunchMultiKernelMultiDevice(";
-      if (data->args.hipExtLaunchMultiKernelMultiDevice.launchParamsList == NULL) oss << "launchParamsList=NULL";
-      else oss << "launchParamsList=" << data->args.hipExtLaunchMultiKernelMultiDevice.launchParamsList__val;
-      oss << ", numDevices=" << data->args.hipExtLaunchMultiKernelMultiDevice.numDevices;
-      oss << ", flags=" << data->args.hipExtLaunchMultiKernelMultiDevice.flags;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipHostAlloc:
-      oss << "hipHostAlloc(";
-      if (data->args.hipHostAlloc.ptr == NULL) oss << "ptr=NULL";
-      else oss << "ptr=" << data->args.hipHostAlloc.ptr__val;
-      oss << ", size=" << data->args.hipHostAlloc.size;
-      oss << ", flags=" << data->args.hipHostAlloc.flags;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipStreamAddCallback:
-      oss << "hipStreamAddCallback(";
-      oss << "stream=" << data->args.hipStreamAddCallback.stream;
-      oss << ", callback=" << data->args.hipStreamAddCallback.callback;
-      oss << ", userData=" << data->args.hipStreamAddCallback.userData;
-      oss << ", flags=" << data->args.hipStreamAddCallback.flags;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemcpyToArray:
-      oss << "hipMemcpyToArray(";
-      if (data->args.hipMemcpyToArray.dst == NULL) oss << "dst=NULL";
-      else oss << "dst=" << data->args.hipMemcpyToArray.dst__val;
-      oss << ", wOffset=" << data->args.hipMemcpyToArray.wOffset;
-      oss << ", hOffset=" << data->args.hipMemcpyToArray.hOffset;
-      oss << ", src=" << data->args.hipMemcpyToArray.src;
-      oss << ", count=" << data->args.hipMemcpyToArray.count;
-      oss << ", kind=" << data->args.hipMemcpyToArray.kind;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemsetD32:
-      oss << "hipMemsetD32(";
-      oss << "dest=" << data->args.hipMemsetD32.dest;
-      oss << ", value=" << data->args.hipMemsetD32.value;
-      oss << ", count=" << data->args.hipMemsetD32.count;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipExtModuleLaunchKernel:
-      oss << "hipExtModuleLaunchKernel(";
-      oss << "f=" << data->args.hipExtModuleLaunchKernel.f;
-      oss << ", globalWorkSizeX=" << data->args.hipExtModuleLaunchKernel.globalWorkSizeX;
-      oss << ", globalWorkSizeY=" << data->args.hipExtModuleLaunchKernel.globalWorkSizeY;
-      oss << ", globalWorkSizeZ=" << data->args.hipExtModuleLaunchKernel.globalWorkSizeZ;
-      oss << ", localWorkSizeX=" << data->args.hipExtModuleLaunchKernel.localWorkSizeX;
-      oss << ", localWorkSizeY=" << data->args.hipExtModuleLaunchKernel.localWorkSizeY;
-      oss << ", localWorkSizeZ=" << data->args.hipExtModuleLaunchKernel.localWorkSizeZ;
-      oss << ", sharedMemBytes=" << data->args.hipExtModuleLaunchKernel.sharedMemBytes;
-      oss << ", hStream=" << data->args.hipExtModuleLaunchKernel.hStream;
-      if (data->args.hipExtModuleLaunchKernel.kernelParams == NULL) oss << ", kernelParams=NULL";
-      else oss << ", kernelParams=" << data->args.hipExtModuleLaunchKernel.kernelParams__val;
-      if (data->args.hipExtModuleLaunchKernel.extra == NULL) oss << ", extra=NULL";
-      else oss << ", extra=" << data->args.hipExtModuleLaunchKernel.extra__val;
-      oss << ", startEvent=" << data->args.hipExtModuleLaunchKernel.startEvent;
-      oss << ", stopEvent=" << data->args.hipExtModuleLaunchKernel.stopEvent;
-      oss << ", flags=" << data->args.hipExtModuleLaunchKernel.flags;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipDeviceSynchronize:
-      oss << "hipDeviceSynchronize(";
-      oss << ")";
-    break;
-    case HIP_API_ID_hipDeviceGetCacheConfig:
-      oss << "hipDeviceGetCacheConfig(";
-      if (data->args.hipDeviceGetCacheConfig.cacheConfig == NULL) oss << "cacheConfig=NULL";
-      else oss << "cacheConfig=" << data->args.hipDeviceGetCacheConfig.cacheConfig__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMalloc3D:
-      oss << "hipMalloc3D(";
-      if (data->args.hipMalloc3D.pitchedDevPtr == NULL) oss << "pitchedDevPtr=NULL";
-      else oss << "pitchedDevPtr=" << data->args.hipMalloc3D.pitchedDevPtr__val;
-      oss << ", extent=" << data->args.hipMalloc3D.extent;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipPointerGetAttributes:
-      oss << "hipPointerGetAttributes(";
-      if (data->args.hipPointerGetAttributes.attributes == NULL) oss << "attributes=NULL";
-      else oss << "attributes=" << data->args.hipPointerGetAttributes.attributes__val;
-      oss << ", ptr=" << data->args.hipPointerGetAttributes.ptr;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemsetAsync:
-      oss << "hipMemsetAsync(";
-      oss << "dst=" << data->args.hipMemsetAsync.dst;
-      oss << ", value=" << data->args.hipMemsetAsync.value;
-      oss << ", sizeBytes=" << data->args.hipMemsetAsync.sizeBytes;
-      oss << ", stream=" << data->args.hipMemsetAsync.stream;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipDeviceGetName:
-      oss << "hipDeviceGetName(";
-      if (data->args.hipDeviceGetName.name == NULL) oss << "name=NULL";
-      else oss << "name=" << data->args.hipDeviceGetName.name__val;
-      oss << ", len=" << data->args.hipDeviceGetName.len;
-      oss << ", device=" << data->args.hipDeviceGetName.device;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSizeWithFlags:
-      oss << "hipModuleOccupancyMaxPotentialBlockSizeWithFlags(";
-      if (data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.gridSize == NULL) oss << "gridSize=NULL";
-      else oss << "gridSize=" << data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.gridSize__val;
-      if (data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSize == NULL) oss << ", blockSize=NULL";
-      else oss << ", blockSize=" << data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSize__val;
-      oss << ", f=" << data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.f;
-      oss << ", dynSharedMemPerBlk=" << data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.dynSharedMemPerBlk;
-      oss << ", blockSizeLimit=" << data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSizeLimit;
-      oss << ", flags=" << data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.flags;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipCtxPushCurrent:
-      oss << "hipCtxPushCurrent(";
-      oss << "ctx=" << data->args.hipCtxPushCurrent.ctx;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemcpyPeer:
-      oss << "hipMemcpyPeer(";
-      oss << "dst=" << data->args.hipMemcpyPeer.dst;
-      oss << ", dstDeviceId=" << data->args.hipMemcpyPeer.dstDeviceId;
-      oss << ", src=" << data->args.hipMemcpyPeer.src;
-      oss << ", srcDeviceId=" << data->args.hipMemcpyPeer.srcDeviceId;
-      oss << ", sizeBytes=" << data->args.hipMemcpyPeer.sizeBytes;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipEventSynchronize:
-      oss << "hipEventSynchronize(";
-      oss << "event=" << data->args.hipEventSynchronize.event;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemcpyDtoDAsync:
-      oss << "hipMemcpyDtoDAsync(";
-      oss << "dst=" << data->args.hipMemcpyDtoDAsync.dst;
-      oss << ", src=" << data->args.hipMemcpyDtoDAsync.src;
-      oss << ", sizeBytes=" << data->args.hipMemcpyDtoDAsync.sizeBytes;
-      oss << ", stream=" << data->args.hipMemcpyDtoDAsync.stream;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipProfilerStart:
-      oss << "hipProfilerStart(";
-      oss << ")";
-    break;
-    case HIP_API_ID_hipExtMallocWithFlags:
-      oss << "hipExtMallocWithFlags(";
-      if (data->args.hipExtMallocWithFlags.ptr == NULL) oss << "ptr=NULL";
-      else oss << "ptr=" << data->args.hipExtMallocWithFlags.ptr__val;
-      oss << ", sizeBytes=" << data->args.hipExtMallocWithFlags.sizeBytes;
-      oss << ", flags=" << data->args.hipExtMallocWithFlags.flags;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipCtxEnablePeerAccess:
-      oss << "hipCtxEnablePeerAccess(";
-      oss << "peerCtx=" << data->args.hipCtxEnablePeerAccess.peerCtx;
-      oss << ", flags=" << data->args.hipCtxEnablePeerAccess.flags;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemAllocHost:
-      oss << "hipMemAllocHost(";
-      if (data->args.hipMemAllocHost.ptr == NULL) oss << "ptr=NULL";
-      else oss << "ptr=" << data->args.hipMemAllocHost.ptr__val;
-      oss << ", size=" << data->args.hipMemAllocHost.size;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemcpyDtoHAsync:
-      oss << "hipMemcpyDtoHAsync(";
-      oss << "dst=" << data->args.hipMemcpyDtoHAsync.dst;
-      oss << ", src=" << data->args.hipMemcpyDtoHAsync.src;
-      oss << ", sizeBytes=" << data->args.hipMemcpyDtoHAsync.sizeBytes;
-      oss << ", stream=" << data->args.hipMemcpyDtoHAsync.stream;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipModuleLaunchKernel:
-      oss << "hipModuleLaunchKernel(";
-      oss << "f=" << data->args.hipModuleLaunchKernel.f;
-      oss << ", gridDimX=" << data->args.hipModuleLaunchKernel.gridDimX;
-      oss << ", gridDimY=" << data->args.hipModuleLaunchKernel.gridDimY;
-      oss << ", gridDimZ=" << data->args.hipModuleLaunchKernel.gridDimZ;
-      oss << ", blockDimX=" << data->args.hipModuleLaunchKernel.blockDimX;
-      oss << ", blockDimY=" << data->args.hipModuleLaunchKernel.blockDimY;
-      oss << ", blockDimZ=" << data->args.hipModuleLaunchKernel.blockDimZ;
-      oss << ", sharedMemBytes=" << data->args.hipModuleLaunchKernel.sharedMemBytes;
-      oss << ", stream=" << data->args.hipModuleLaunchKernel.stream;
-      if (data->args.hipModuleLaunchKernel.kernelParams == NULL) oss << ", kernelParams=NULL";
-      else oss << ", kernelParams=" << data->args.hipModuleLaunchKernel.kernelParams__val;
-      if (data->args.hipModuleLaunchKernel.extra == NULL) oss << ", extra=NULL";
-      else oss << ", extra=" << data->args.hipModuleLaunchKernel.extra__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemAllocPitch:
-      oss << "hipMemAllocPitch(";
-      if (data->args.hipMemAllocPitch.dptr == NULL) oss << "dptr=NULL";
-      else oss << "dptr=" << data->args.hipMemAllocPitch.dptr__val;
-      if (data->args.hipMemAllocPitch.pitch == NULL) oss << ", pitch=NULL";
-      else oss << ", pitch=" << data->args.hipMemAllocPitch.pitch__val;
-      oss << ", widthInBytes=" << data->args.hipMemAllocPitch.widthInBytes;
-      oss << ", height=" << data->args.hipMemAllocPitch.height;
-      oss << ", elementSizeBytes=" << data->args.hipMemAllocPitch.elementSizeBytes;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipExtLaunchKernel:
-      oss << "hipExtLaunchKernel(";
-      oss << "function_address=" << data->args.hipExtLaunchKernel.function_address;
-      oss << ", numBlocks=" << data->args.hipExtLaunchKernel.numBlocks;
-      oss << ", dimBlocks=" << data->args.hipExtLaunchKernel.dimBlocks;
-      if (data->args.hipExtLaunchKernel.args == NULL) oss << ", args=NULL";
-      else oss << ", args=" << data->args.hipExtLaunchKernel.args__val;
-      oss << ", sharedMemBytes=" << data->args.hipExtLaunchKernel.sharedMemBytes;
-      oss << ", stream=" << data->args.hipExtLaunchKernel.stream;
-      oss << ", startEvent=" << data->args.hipExtLaunchKernel.startEvent;
-      oss << ", stopEvent=" << data->args.hipExtLaunchKernel.stopEvent;
-      oss << ", flags=" << data->args.hipExtLaunchKernel.flags;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemcpy2DFromArrayAsync:
-      oss << "hipMemcpy2DFromArrayAsync(";
-      oss << "dst=" << data->args.hipMemcpy2DFromArrayAsync.dst;
-      oss << ", dpitch=" << data->args.hipMemcpy2DFromArrayAsync.dpitch;
-      oss << ", src=" << data->args.hipMemcpy2DFromArrayAsync.src;
-      oss << ", wOffset=" << data->args.hipMemcpy2DFromArrayAsync.wOffset;
-      oss << ", hOffset=" << data->args.hipMemcpy2DFromArrayAsync.hOffset;
-      oss << ", width=" << data->args.hipMemcpy2DFromArrayAsync.width;
-      oss << ", height=" << data->args.hipMemcpy2DFromArrayAsync.height;
-      oss << ", kind=" << data->args.hipMemcpy2DFromArrayAsync.kind;
-      oss << ", stream=" << data->args.hipMemcpy2DFromArrayAsync.stream;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipDeviceGetLimit:
-      oss << "hipDeviceGetLimit(";
-      if (data->args.hipDeviceGetLimit.pValue == NULL) oss << "pValue=NULL";
-      else oss << "pValue=" << data->args.hipDeviceGetLimit.pValue__val;
-      oss << ", limit=" << data->args.hipDeviceGetLimit.limit;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipModuleLoadDataEx:
-      oss << "hipModuleLoadDataEx(";
-      if (data->args.hipModuleLoadDataEx.module == NULL) oss << "module=NULL";
-      else oss << "module=" << data->args.hipModuleLoadDataEx.module__val;
-      oss << ", image=" << data->args.hipModuleLoadDataEx.image;
-      oss << ", numOptions=" << data->args.hipModuleLoadDataEx.numOptions;
-      if (data->args.hipModuleLoadDataEx.options == NULL) oss << ", options=NULL";
-      else oss << ", options=" << data->args.hipModuleLoadDataEx.options__val;
-      if (data->args.hipModuleLoadDataEx.optionsValues == NULL) oss << ", optionsValues=NULL";
-      else oss << ", optionsValues=" << data->args.hipModuleLoadDataEx.optionsValues__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipRuntimeGetVersion:
-      oss << "hipRuntimeGetVersion(";
-      if (data->args.hipRuntimeGetVersion.runtimeVersion == NULL) oss << "runtimeVersion=NULL";
-      else oss << "runtimeVersion=" << data->args.hipRuntimeGetVersion.runtimeVersion__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemRangeGetAttribute:
-      oss << "hipMemRangeGetAttribute(";
-      oss << "data=" << data->args.hipMemRangeGetAttribute.data;
-      oss << ", data_size=" << data->args.hipMemRangeGetAttribute.data_size;
-      oss << ", attribute=" << data->args.hipMemRangeGetAttribute.attribute;
-      oss << ", dev_ptr=" << data->args.hipMemRangeGetAttribute.dev_ptr;
-      oss << ", count=" << data->args.hipMemRangeGetAttribute.count;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipDeviceGetP2PAttribute:
-      oss << "hipDeviceGetP2PAttribute(";
-      if (data->args.hipDeviceGetP2PAttribute.value == NULL) oss << "value=NULL";
-      else oss << "value=" << data->args.hipDeviceGetP2PAttribute.value__val;
-      oss << ", attr=" << data->args.hipDeviceGetP2PAttribute.attr;
-      oss << ", srcDevice=" << data->args.hipDeviceGetP2PAttribute.srcDevice;
-      oss << ", dstDevice=" << data->args.hipDeviceGetP2PAttribute.dstDevice;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemcpyPeerAsync:
-      oss << "hipMemcpyPeerAsync(";
-      oss << "dst=" << data->args.hipMemcpyPeerAsync.dst;
-      oss << ", dstDeviceId=" << data->args.hipMemcpyPeerAsync.dstDeviceId;
-      oss << ", src=" << data->args.hipMemcpyPeerAsync.src;
-      oss << ", srcDevice=" << data->args.hipMemcpyPeerAsync.srcDevice;
-      oss << ", sizeBytes=" << data->args.hipMemcpyPeerAsync.sizeBytes;
-      oss << ", stream=" << data->args.hipMemcpyPeerAsync.stream;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipGetDeviceProperties:
-      oss << "hipGetDeviceProperties(";
-      if (data->args.hipGetDeviceProperties.props == NULL) oss << "props=NULL";
-      else oss << "props=" << data->args.hipGetDeviceProperties.props__val;
-      oss << ", device=" << data->args.hipGetDeviceProperties.device;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemcpyDtoH:
-      oss << "hipMemcpyDtoH(";
-      oss << "dst=" << data->args.hipMemcpyDtoH.dst;
-      oss << ", src=" << data->args.hipMemcpyDtoH.src;
-      oss << ", sizeBytes=" << data->args.hipMemcpyDtoH.sizeBytes;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemcpyWithStream:
-      oss << "hipMemcpyWithStream(";
-      oss << "dst=" << data->args.hipMemcpyWithStream.dst;
-      oss << ", src=" << data->args.hipMemcpyWithStream.src;
-      oss << ", sizeBytes=" << data->args.hipMemcpyWithStream.sizeBytes;
-      oss << ", kind=" << data->args.hipMemcpyWithStream.kind;
-      oss << ", stream=" << data->args.hipMemcpyWithStream.stream;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipDeviceTotalMem:
-      oss << "hipDeviceTotalMem(";
-      if (data->args.hipDeviceTotalMem.bytes == NULL) oss << "bytes=NULL";
-      else oss << "bytes=" << data->args.hipDeviceTotalMem.bytes__val;
-      oss << ", device=" << data->args.hipDeviceTotalMem.device;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipHostGetDevicePointer:
-      oss << "hipHostGetDevicePointer(";
-      if (data->args.hipHostGetDevicePointer.devPtr == NULL) oss << "devPtr=NULL";
-      else oss << "devPtr=" << data->args.hipHostGetDevicePointer.devPtr__val;
-      oss << ", hstPtr=" << data->args.hipHostGetDevicePointer.hstPtr;
-      oss << ", flags=" << data->args.hipHostGetDevicePointer.flags;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemRangeGetAttributes:
-      oss << "hipMemRangeGetAttributes(";
-      if (data->args.hipMemRangeGetAttributes.data == NULL) oss << "data=NULL";
-      else oss << "data=" << data->args.hipMemRangeGetAttributes.data__val;
-      if (data->args.hipMemRangeGetAttributes.data_sizes == NULL) oss << ", data_sizes=NULL";
-      else oss << ", data_sizes=" << data->args.hipMemRangeGetAttributes.data_sizes__val;
-      if (data->args.hipMemRangeGetAttributes.attributes == NULL) oss << ", attributes=NULL";
-      else oss << ", attributes=" << data->args.hipMemRangeGetAttributes.attributes__val;
-      oss << ", num_attributes=" << data->args.hipMemRangeGetAttributes.num_attributes;
-      oss << ", dev_ptr=" << data->args.hipMemRangeGetAttributes.dev_ptr;
-      oss << ", count=" << data->args.hipMemRangeGetAttributes.count;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemcpyParam2D:
-      oss << "hipMemcpyParam2D(";
-      if (data->args.hipMemcpyParam2D.pCopy == NULL) oss << "pCopy=NULL";
-      else oss << "pCopy=" << data->args.hipMemcpyParam2D.pCopy__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipDevicePrimaryCtxReset:
-      oss << "hipDevicePrimaryCtxReset(";
-      oss << "dev=" << data->args.hipDevicePrimaryCtxReset.dev;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipGetMipmappedArrayLevel:
-      oss << "hipGetMipmappedArrayLevel(";
-      if (data->args.hipGetMipmappedArrayLevel.levelArray == NULL) oss << "levelArray=NULL";
-      else oss << "levelArray=" << data->args.hipGetMipmappedArrayLevel.levelArray__val;
-      oss << ", mipmappedArray=" << data->args.hipGetMipmappedArrayLevel.mipmappedArray;
-      oss << ", level=" << data->args.hipGetMipmappedArrayLevel.level;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipMemsetD32Async:
-      oss << "hipMemsetD32Async(";
-      oss << "dst=" << data->args.hipMemsetD32Async.dst;
-      oss << ", value=" << data->args.hipMemsetD32Async.value;
-      oss << ", count=" << data->args.hipMemsetD32Async.count;
-      oss << ", stream=" << data->args.hipMemsetD32Async.stream;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipGetDevice:
-      oss << "hipGetDevice(";
-      if (data->args.hipGetDevice.deviceId == NULL) oss << "deviceId=NULL";
-      else oss << "deviceId=" << data->args.hipGetDevice.deviceId__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipGetDeviceCount:
-      oss << "hipGetDeviceCount(";
-      if (data->args.hipGetDeviceCount.count == NULL) oss << "count=NULL";
-      else oss << "count=" << data->args.hipGetDeviceCount.count__val;
-      oss << ")";
-    break;
-    case HIP_API_ID_hipIpcOpenEventHandle:
-      oss << "hipIpcOpenEventHandle(";
-      if (data->args.hipIpcOpenEventHandle.event == NULL) oss << "event=NULL";
-      else oss << "event=" << data->args.hipIpcOpenEventHandle.event__val;
-      oss << ", handle=" << data->args.hipIpcOpenEventHandle.handle;
-      oss << ")";
-    break;
-    default: oss << "unknown";
-  };
-  return strdup(oss.str().c_str());
-}
-#endif  // HIP_PROF_HIP_API_STRING
-#endif  // _HIP_PROF_STR_H
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_runtime.h b/third_party/rocm/include/hip/hcc_detail/hip_runtime.h
deleted file mode 100644
index 5411bb3..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_runtime.h
+++ /dev/null
@@ -1,612 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/**
- *  @file  hcc_detail/hip_runtime.h
- *  @brief Contains definitions of APIs for HIP runtime.
- */
-
-//#pragma once
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_H
-
-#include <hip/hcc_detail/hip_common.h>
-
-//---
-// Top part of file can be compiled with any compiler
-
-//#include <cstring>
-#if __cplusplus
-#include <cmath>
-#include <cstdint>
-#else
-#include <math.h>
-#include <string.h>
-#include <stddef.h>
-#endif  //__cplusplus
-
-// __hip_malloc is not working. Disable it by default.
-#ifndef __HIP_ENABLE_DEVICE_MALLOC__
-#define __HIP_ENABLE_DEVICE_MALLOC__ 0
-#endif
-
-#if __HCC_OR_HIP_CLANG__
-
-#if __HIP__
-#if !defined(__align__)
-#define __align__(x) __attribute__((aligned(x)))
-#endif
-#endif
-
-#define CUDA_SUCCESS hipSuccess
-
-#include <hip/hip_runtime_api.h>
-#endif  // __HCC_OR_HIP_CLANG__
-
-#if __HCC__
-// define HIP_ENABLE_PRINTF to enable printf
-#ifdef HIP_ENABLE_PRINTF
-#define HCC_ENABLE_ACCELERATOR_PRINTF 1
-#endif
-
-//---
-// Remainder of this file only compiles with HCC
-#if defined __HCC__
-#include "grid_launch.h"
-#include "hc_printf.hpp"
-// TODO-HCC-GL - change this to typedef.
-// typedef grid_launch_parm hipLaunchParm ;
-
-#if GENERIC_GRID_LAUNCH == 0
-#define hipLaunchParm grid_launch_parm
-#else
-namespace hip_impl {
-struct Empty_launch_parm {};
-}  // namespace hip_impl
-#define hipLaunchParm hip_impl::Empty_launch_parm
-#endif  // GENERIC_GRID_LAUNCH
-
-#if defined(GRID_LAUNCH_VERSION) and (GRID_LAUNCH_VERSION >= 20) || GENERIC_GRID_LAUNCH == 1
-#else  // Use field names for grid_launch 2.0 structure, if HCC supports GL 2.0.
-#error(HCC must support GRID_LAUNCH_20)
-#endif  // GRID_LAUNCH_VERSION
-
-#endif  // HCC
-
-#if GENERIC_GRID_LAUNCH == 1 && defined __HCC__
-#include "grid_launch_GGL.hpp"
-#endif  // GENERIC_GRID_LAUNCH
-
-#endif // HCC
-
-#if __HCC_OR_HIP_CLANG__
-extern int HIP_TRACE_API;
-
-#ifdef __cplusplus
-#include <hip/hcc_detail/hip_ldg.h>
-#endif
-#include <hip/hcc_detail/hip_atomic.h>
-#include <hip/hcc_detail/host_defines.h>
-#include <hip/hcc_detail/device_functions.h>
-#include <hip/hcc_detail/surface_functions.h>
-#if __HCC__
-    #include <hip/hcc_detail/math_functions.h>
-    #include <hip/hcc_detail/texture_functions.h>
-#else
-    #include <hip/hcc_detail/texture_fetch_functions.h>
-    #include <hip/hcc_detail/texture_indirect_functions.h>
-#endif
-// TODO-HCC remove old definitions ; ~1602 hcc supports __HCC_ACCELERATOR__ define.
-#if defined(__KALMAR_ACCELERATOR__) && !defined(__HCC_ACCELERATOR__)
-#define __HCC_ACCELERATOR__ __KALMAR_ACCELERATOR__
-#endif
-
-// TODO-HCC add a dummy implementation of assert, need to replace with a proper kernel exit call.
-#if defined(__HCC__) && __HIP_DEVICE_COMPILE__ == 1
-#undef assert
-#define assert(COND)                                                                               \
-    {                                                                                              \
-        if (!(COND)) {                                                                             \
-            abort();                                                                               \
-        }                                                                                          \
-    }
-#endif
-
-
-// Feature tests:
-#if (defined(__HCC_ACCELERATOR__) && (__HCC_ACCELERATOR__ != 0)) || __HIP_DEVICE_COMPILE__
-// Device compile and not host compile:
-
-// 32-bit Atomics:
-#define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (1)
-#define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (1)
-#define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (1)
-#define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (1)
-#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (1)
-
-// 64-bit Atomics:
-#define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (1)
-#define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (0)
-
-// Doubles
-#define __HIP_ARCH_HAS_DOUBLES__ (1)
-
-// warp cross-lane operations:
-#define __HIP_ARCH_HAS_WARP_VOTE__ (1)
-#define __HIP_ARCH_HAS_WARP_BALLOT__ (1)
-#define __HIP_ARCH_HAS_WARP_SHUFFLE__ (1)
-#define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (0)
-
-// sync
-#define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (1)
-#define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (0)
-
-// misc
-#define __HIP_ARCH_HAS_SURFACE_FUNCS__ (0)
-#define __HIP_ARCH_HAS_3DGRID__ (1)
-#define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (0)
-
-#endif /* Device feature flags */
-
-
-#define launch_bounds_impl0(requiredMaxThreadsPerBlock)                                            \
-    __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock)))
-#define launch_bounds_impl1(requiredMaxThreadsPerBlock, minBlocksPerMultiprocessor)                \
-    __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock),                     \
-                   amdgpu_waves_per_eu(minBlocksPerMultiprocessor)))
-#define select_impl_(_1, _2, impl_, ...) impl_
-#define __launch_bounds__(...)                                                                     \
-    select_impl_(__VA_ARGS__, launch_bounds_impl1, launch_bounds_impl0)(__VA_ARGS__)
-
-// Detect if we are compiling C++ mode or C mode
-#if defined(__cplusplus)
-#define __HCC_CPP__
-#elif defined(__STDC_VERSION__)
-#define __HCC_C__
-#endif
-
-__host__ inline void* __get_dynamicgroupbaseptr() { return nullptr; }
-
-#if __HIP_ARCH_GFX701__ == 0
-
-__device__ unsigned __hip_ds_bpermute(int index, unsigned src);
-__device__ float __hip_ds_bpermutef(int index, float src);
-__device__ unsigned __hip_ds_permute(int index, unsigned src);
-__device__ float __hip_ds_permutef(int index, float src);
-
-template <int pattern>
-__device__ unsigned __hip_ds_swizzle_N(unsigned int src);
-template <int pattern>
-__device__ float __hip_ds_swizzlef_N(float src);
-
-template <int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl>
-__device__ int __hip_move_dpp_N(int src);
-
-#endif  //__HIP_ARCH_GFX803__ == 1
-
-#endif  // __HCC_OR_HIP_CLANG__
-
-#if defined __HCC__
-
-namespace hip_impl {
-  struct GroupId {
-    using R = decltype(hc_get_group_id(0));
-
-    __device__
-    R operator()(std::uint32_t x) const noexcept { return hc_get_group_id(x); }
-  };
-  struct GroupSize {
-    using R = decltype(hc_get_group_size(0));
-
-    __device__
-    R operator()(std::uint32_t x) const noexcept {
-      return hc_get_group_size(x);
-    }
-  };
-  struct NumGroups {
-    using R = decltype(hc_get_num_groups(0));
-
-    __device__
-    R operator()(std::uint32_t x) const noexcept {
-      return hc_get_num_groups(x);
-    }
-  };
-  struct WorkitemId {
-    using R = decltype(hc_get_workitem_id(0));
-
-    __device__
-    R operator()(std::uint32_t x) const noexcept {
-      return hc_get_workitem_id(x);
-    }
-  };
-} // Namespace hip_impl.
-
-template <typename F>
-struct Coordinates {
-  using R = decltype(F{}(0));
-
-  struct X { __device__ operator R() const noexcept { return F{}(0); } };
-  struct Y { __device__ operator R() const noexcept { return F{}(1); } };
-  struct Z { __device__ operator R() const noexcept { return F{}(2); } };
-
-  static constexpr X x{};
-  static constexpr Y y{};
-  static constexpr Z z{};
-};
-
-inline
-__device__
-std::uint32_t operator*(Coordinates<hip_impl::NumGroups>::X,
-                        Coordinates<hip_impl::GroupSize>::X) noexcept {
-  return hc_get_grid_size(0);
-}
-inline
-__device__
-std::uint32_t operator*(Coordinates<hip_impl::GroupSize>::X,
-                        Coordinates<hip_impl::NumGroups>::X) noexcept {
-  return hc_get_grid_size(0);
-}
-inline
-__device__
-std::uint32_t operator*(Coordinates<hip_impl::NumGroups>::Y,
-                        Coordinates<hip_impl::GroupSize>::Y) noexcept {
-  return hc_get_grid_size(1);
-}
-inline
-__device__
-std::uint32_t operator*(Coordinates<hip_impl::GroupSize>::Y,
-                        Coordinates<hip_impl::NumGroups>::Y) noexcept {
-  return hc_get_grid_size(1);
-}
-inline
-__device__
-std::uint32_t operator*(Coordinates<hip_impl::NumGroups>::Z,
-                        Coordinates<hip_impl::GroupSize>::Z) noexcept {
-  return hc_get_grid_size(2);
-}
-inline
-__device__
-std::uint32_t operator*(Coordinates<hip_impl::GroupSize>::Z,
-                        Coordinates<hip_impl::NumGroups>::Z) noexcept {
-  return hc_get_grid_size(2);
-}
-
-static constexpr Coordinates<hip_impl::GroupSize> blockDim{};
-static constexpr Coordinates<hip_impl::GroupId> blockIdx{};
-static constexpr Coordinates<hip_impl::NumGroups> gridDim{};
-static constexpr Coordinates<hip_impl::WorkitemId> threadIdx{};
-
-#define hipThreadIdx_x (hc_get_workitem_id(0))
-#define hipThreadIdx_y (hc_get_workitem_id(1))
-#define hipThreadIdx_z (hc_get_workitem_id(2))
-
-#define hipBlockIdx_x (hc_get_group_id(0))
-#define hipBlockIdx_y (hc_get_group_id(1))
-#define hipBlockIdx_z (hc_get_group_id(2))
-
-#define hipBlockDim_x (hc_get_group_size(0))
-#define hipBlockDim_y (hc_get_group_size(1))
-#define hipBlockDim_z (hc_get_group_size(2))
-
-#define hipGridDim_x (hc_get_num_groups(0))
-#define hipGridDim_y (hc_get_num_groups(1))
-#define hipGridDim_z (hc_get_num_groups(2))
-
-#endif // defined __HCC__
-
-#ifndef __OPENMP_AMDGCN__
-#if __HCC_OR_HIP_CLANG__
-#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
-#if __HIP_ENABLE_DEVICE_MALLOC__
-extern "C" __device__ void* __hip_malloc(size_t);
-extern "C" __device__ void* __hip_free(void* ptr);
-static inline __device__ void* malloc(size_t size) { return __hip_malloc(size); }
-static inline __device__ void* free(void* ptr) { return __hip_free(ptr); }
-#else
-static inline __device__ void* malloc(size_t size) { __builtin_trap(); return nullptr; }
-static inline __device__ void* free(void* ptr) { __builtin_trap(); return nullptr; }
-#endif
-#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
-#endif //__HCC_OR_HIP_CLANG__
-#endif // !__OPENMP_AMDGCN__
-
-#ifdef __HCC__
-
-#define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE)
-
-#define HIP_KERNEL_NAME(...) (__VA_ARGS__)
-#define HIP_SYMBOL(X) #X
-
-#if defined __HCC_CPP__
-extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block,
-                                       grid_launch_parm* lp, const char* kernelNameStr, bool lockAcquired = 0);
-extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block,
-                                       grid_launch_parm* lp, const char* kernelNameStr, bool lockAcquired = 0);
-extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block,
-                                       grid_launch_parm* lp, const char* kernelNameStr, bool lockAcquired = 0);
-extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block,
-                                       grid_launch_parm* lp, const char* kernelNameStr, bool lockAcquired = 0);
-extern void ihipPostLaunchKernel(const char* kernelName, hipStream_t stream, grid_launch_parm& lp, bool unlockPostponed = 0);
-
-#if GENERIC_GRID_LAUNCH == 0
-//#warning "Original hipLaunchKernel defined"
-// Due to multiple overloaded versions of ihipPreLaunchKernel, the numBlocks3D and blockDim3D can be
-// either size_t or dim3 types
-#define hipLaunchKernel(_kernelName, _numBlocks3D, _blockDim3D, _groupMemBytes, _stream, ...)      \
-    do {                                                                                           \
-        grid_launch_parm lp;                                                                       \
-        lp.dynamic_group_mem_bytes = _groupMemBytes;                                               \
-        hipStream_t trueStream =                                                                   \
-            (ihipPreLaunchKernel(_stream, _numBlocks3D, _blockDim3D, &lp, #_kernelName));          \
-        _kernelName(lp, ##__VA_ARGS__);                                                            \
-        ihipPostLaunchKernel(#_kernelName, trueStream, lp);                                        \
-    } while (0)
-#endif  // GENERIC_GRID_LAUNCH
-
-#elif defined(__HCC_C__)
-
-// TODO - develop C interface.
-
-#endif  //__HCC_CPP__
-
-// End doxygen API:
-/**
- *   @}
- */
-
-//
-// hip-clang functions
-//
-#elif defined(__clang__) && defined(__HIP__)
-
-#define HIP_KERNEL_NAME(...) __VA_ARGS__
-#define HIP_SYMBOL(X) X
-
-typedef int hipLaunchParm;
-
-template <std::size_t n, typename... Ts,
-          typename std::enable_if<n == sizeof...(Ts)>::type* = nullptr>
-void pArgs(const std::tuple<Ts...>&, void*) {}
-
-template <std::size_t n, typename... Ts,
-          typename std::enable_if<n != sizeof...(Ts)>::type* = nullptr>
-void pArgs(const std::tuple<Ts...>& formals, void** _vargs) {
-    using T = typename std::tuple_element<n, std::tuple<Ts...> >::type;
-
-    static_assert(!std::is_reference<T>{},
-                  "A __global__ function cannot have a reference as one of its "
-                  "arguments.");
-#if defined(HIP_STRICT)
-    static_assert(std::is_trivially_copyable<T>{},
-                  "Only TriviallyCopyable types can be arguments to a __global__ "
-                  "function");
-#endif
-    _vargs[n] = const_cast<void*>(reinterpret_cast<const void*>(&std::get<n>(formals)));
-    return pArgs<n + 1>(formals, _vargs);
-}
-
-template <typename... Formals, typename... Actuals>
-std::tuple<Formals...> validateArgsCountType(void (*kernel)(Formals...), std::tuple<Actuals...>(actuals)) {
-    static_assert(sizeof...(Formals) == sizeof...(Actuals), "Argument Count Mismatch");
-    std::tuple<Formals...> to_formals{std::move(actuals)};
-    return to_formals;
-}
-
-#if defined(HIP_TEMPLATE_KERNEL_LAUNCH)
-template <typename... Args, typename F = void (*)(Args...)>
-void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
-                        std::uint32_t sharedMemBytes, hipStream_t stream, Args... args) {
-    constexpr size_t count = sizeof...(Args);
-    auto tup_ = std::tuple<Args...>{args...};
-    auto tup = validateArgsCountType(kernel, tup_);
-    void* _Args[count];
-    pArgs<0>(tup, _Args);
-
-    auto k = reinterpret_cast<void*>(kernel);
-    hipLaunchKernel(k, numBlocks, dimBlocks, _Args, sharedMemBytes, stream);
-}
-#else
-#define hipLaunchKernelGGLInternal(kernelName, numBlocks, numThreads, memPerBlock, streamId, ...)  \
-    do {                                                                                           \
-        kernelName<<<(numBlocks), (numThreads), (memPerBlock), (streamId)>>>(__VA_ARGS__);         \
-    } while (0)
-
-#define hipLaunchKernelGGL(kernelName, ...)  hipLaunchKernelGGLInternal((kernelName), __VA_ARGS__)
-#endif
-
-#include <hip/hip_runtime_api.h>
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_id(uint);
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_group_id(uint);
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_size(uint);
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_num_groups(uint);
-struct __HIP_BlockIdx {
-  __device__
-  std::uint32_t operator()(std::uint32_t x) const noexcept { return __ockl_get_group_id(x); }
-};
-struct __HIP_BlockDim {
-  __device__
-  std::uint32_t operator()(std::uint32_t x) const noexcept {
-    return __ockl_get_local_size(x);
-  }
-};
-struct __HIP_GridDim {
-  __device__
-  std::uint32_t operator()(std::uint32_t x) const noexcept {
-    return __ockl_get_num_groups(x);
-  }
-};
-struct __HIP_ThreadIdx {
-  __device__
-  std::uint32_t operator()(std::uint32_t x) const noexcept {
-    return __ockl_get_local_id(x);
-  }
-};
-
-template <typename F>
-struct __HIP_Coordinates {
-  using R = decltype(F{}(0));
-
-  struct X { __device__ operator R() const noexcept { return F{}(0); } };
-  struct Y { __device__ operator R() const noexcept { return F{}(1); } };
-  struct Z { __device__ operator R() const noexcept { return F{}(2); } };
-
-  static constexpr X x{};
-  static constexpr Y y{};
-  static constexpr Z z{};
-#ifdef __cplusplus
-  __device__ operator dim3() const { return dim3(x, y, z); }
-#endif
-
-};
-template <typename F>
-#if !defined(_MSC_VER)
-__attribute__((weak))
-#endif
-constexpr typename __HIP_Coordinates<F>::X __HIP_Coordinates<F>::x;
-template <typename F>
-#if !defined(_MSC_VER)
-__attribute__((weak))
-#endif
-constexpr typename __HIP_Coordinates<F>::Y __HIP_Coordinates<F>::y;
-template <typename F>
-#if !defined(_MSC_VER)
-__attribute__((weak))
-#endif
-constexpr typename __HIP_Coordinates<F>::Z __HIP_Coordinates<F>::z;
-
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_global_size(uint);
-inline
-__device__
-std::uint32_t operator*(__HIP_Coordinates<__HIP_GridDim>::X,
-                        __HIP_Coordinates<__HIP_BlockDim>::X) noexcept {
-  return __ockl_get_global_size(0);
-}
-inline
-__device__
-std::uint32_t operator*(__HIP_Coordinates<__HIP_BlockDim>::X,
-                        __HIP_Coordinates<__HIP_GridDim>::X) noexcept {
-  return __ockl_get_global_size(0);
-}
-inline
-__device__
-std::uint32_t operator*(__HIP_Coordinates<__HIP_GridDim>::Y,
-                        __HIP_Coordinates<__HIP_BlockDim>::Y) noexcept {
-  return __ockl_get_global_size(1);
-}
-inline
-__device__
-std::uint32_t operator*(__HIP_Coordinates<__HIP_BlockDim>::Y,
-                        __HIP_Coordinates<__HIP_GridDim>::Y) noexcept {
-  return __ockl_get_global_size(1);
-}
-inline
-__device__
-std::uint32_t operator*(__HIP_Coordinates<__HIP_GridDim>::Z,
-                        __HIP_Coordinates<__HIP_BlockDim>::Z) noexcept {
-  return __ockl_get_global_size(2);
-}
-inline
-__device__
-std::uint32_t operator*(__HIP_Coordinates<__HIP_BlockDim>::Z,
-                        __HIP_Coordinates<__HIP_GridDim>::Z) noexcept {
-  return __ockl_get_global_size(2);
-}
-
-static constexpr __HIP_Coordinates<__HIP_BlockDim> blockDim{};
-static constexpr __HIP_Coordinates<__HIP_BlockIdx> blockIdx{};
-static constexpr __HIP_Coordinates<__HIP_GridDim> gridDim{};
-static constexpr __HIP_Coordinates<__HIP_ThreadIdx> threadIdx{};
-
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_id(uint);
-#define hipThreadIdx_x (__ockl_get_local_id(0))
-#define hipThreadIdx_y (__ockl_get_local_id(1))
-#define hipThreadIdx_z (__ockl_get_local_id(2))
-
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_group_id(uint);
-#define hipBlockIdx_x (__ockl_get_group_id(0))
-#define hipBlockIdx_y (__ockl_get_group_id(1))
-#define hipBlockIdx_z (__ockl_get_group_id(2))
-
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_size(uint);
-#define hipBlockDim_x (__ockl_get_local_size(0))
-#define hipBlockDim_y (__ockl_get_local_size(1))
-#define hipBlockDim_z (__ockl_get_local_size(2))
-
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_num_groups(uint);
-#define hipGridDim_x (__ockl_get_num_groups(0))
-#define hipGridDim_y (__ockl_get_num_groups(1))
-#define hipGridDim_z (__ockl_get_num_groups(2))
-
-#include <hip/hcc_detail/math_functions.h>
-
-#if __HIP_HCC_COMPAT_MODE__
-// Define HCC work item functions in terms of HIP builtin variables.
-#pragma push_macro("__DEFINE_HCC_FUNC")
-#define __DEFINE_HCC_FUNC(hc_fun,hip_var) \
-inline __device__ __attribute__((always_inline)) uint hc_get_##hc_fun(uint i) { \
-  if (i==0) \
-    return hip_var.x; \
-  else if(i==1) \
-    return hip_var.y; \
-  else \
-    return hip_var.z; \
-}
-
-__DEFINE_HCC_FUNC(workitem_id, threadIdx)
-__DEFINE_HCC_FUNC(group_id, blockIdx)
-__DEFINE_HCC_FUNC(group_size, blockDim)
-__DEFINE_HCC_FUNC(num_groups, gridDim)
-#pragma pop_macro("__DEFINE_HCC_FUNC")
-
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_global_id(uint);
-inline __device__ __attribute__((always_inline)) uint
-hc_get_workitem_absolute_id(int dim)
-{
-  return (uint)__ockl_get_global_id(dim);
-}
-
-#endif
-
-#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
-// Support std::complex.
-#if !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__
-#pragma push_macro("__CUDA__")
-#define __CUDA__
-#include <__clang_cuda_math_forward_declares.h>
-#include <__clang_cuda_complex_builtins.h>
-// Workaround for using libc++ with HIP-Clang.
-// The following headers requires clang include path before standard C++ include path.
-// However libc++ include path requires to be before clang include path.
-// To workaround this, we pass -isystem with the parent directory of clang include
-// path instead of the clang include path itself.
-#include <include/cuda_wrappers/algorithm>
-#include <include/cuda_wrappers/complex>
-#include <include/cuda_wrappers/new>
-#undef __CUDA__
-#pragma pop_macro("__CUDA__")
-#endif // !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__
-#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
-#endif // defined(__clang__) && defined(__HIP__)
-
-#include <hip/hcc_detail/hip_memory.h>
-
-#endif  // HIP_HCC_DETAIL_RUNTIME_H
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_runtime_api.h b/third_party/rocm/include/hip/hcc_detail/hip_runtime_api.h
deleted file mode 100644
index 1980004..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_runtime_api.h
+++ /dev/null
@@ -1,4358 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-//#pragma once
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_API_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_API_H
-/**
- *  @file  hcc_detail/hip_runtime_api.h
- *  @brief Contains C function APIs for HIP runtime. This file does not use any HCC builtin or
- * special language extensions (-hc mode) ; those functions in hip_runtime.h.
- */
-#include <stdint.h>
-#include <stddef.h>
-
-#ifndef GENERIC_GRID_LAUNCH
-#define GENERIC_GRID_LAUNCH 1
-#endif
-
-#ifndef __HIP_ROCclr__
-#define __HIP_ROCclr__ 0
-#endif
-
-#include <hip/hcc_detail/host_defines.h>
-#include <hip/hcc_detail/driver_types.h>
-#include <hip/hcc_detail/hip_texture_types.h>
-#include <hip/hcc_detail/hip_surface_types.h>
-
-#if !__HIP_ROCclr__ && defined(__cplusplus)
-#include <hsa/hsa.h>
-#include <hip/hcc_detail/program_state.hpp>
-#endif
-
-#if defined(_MSC_VER)
-#define DEPRECATED(msg) __declspec(deprecated(msg))
-#else // !defined(_MSC_VER)
-#define DEPRECATED(msg) __attribute__ ((deprecated(msg)))
-#endif // !defined(_MSC_VER)
-
-#define DEPRECATED_MSG "This API is marked as deprecated and may not be supported in future releases. For more details please refer https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_deprecated_api_list.md"
-
-#if defined(__HCC__) && (__hcc_workweek__ < 16155)
-#error("This version of HIP requires a newer version of HCC.");
-#endif
-
-#define HIP_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01)
-#define HIP_LAUNCH_PARAM_BUFFER_SIZE ((void*)0x02)
-#define HIP_LAUNCH_PARAM_END ((void*)0x03)
-
-#ifdef __cplusplus
-  #define __dparm(x) \
-          = x
-#else
-  #define __dparm(x)
-#endif
-
-#ifdef __GNUC__
-#pragma GCC visibility push (default)
-#endif
-
-#ifdef __cplusplus
-
-namespace hip_impl {
-hipError_t hip_init();
-}  // namespace hip_impl
-#endif
-
-// Structure definitions:
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-//---
-// API-visible structures
-typedef struct ihipCtx_t* hipCtx_t;
-
-// Note many APIs also use integer deviceIds as an alternative to the device pointer:
-typedef int hipDevice_t;
-
-typedef enum hipDeviceP2PAttr {
-  hipDevP2PAttrPerformanceRank = 0,
-  hipDevP2PAttrAccessSupported,
-  hipDevP2PAttrNativeAtomicSupported,
-  hipDevP2PAttrHipArrayAccessSupported
-} hipDeviceP2PAttr;
-
-typedef struct ihipStream_t* hipStream_t;
-
-#define hipIpcMemLazyEnablePeerAccess 0
-
-#define HIP_IPC_HANDLE_SIZE 64
-
-typedef struct hipIpcMemHandle_st {
-    char reserved[HIP_IPC_HANDLE_SIZE];
-} hipIpcMemHandle_t;
-
-#if __HIP_ROCclr__
-// TODO: IPC event handle currently unsupported
-struct ihipIpcEventHandle_t;
-typedef struct ihipIpcEventHandle_t* hipIpcEventHandle_t;
-#else
-typedef struct hipIpcEventHandle_st {
-    char reserved[HIP_IPC_HANDLE_SIZE];
-} hipIpcEventHandle_t;
-#endif
-typedef struct ihipModule_t* hipModule_t;
-
-typedef struct ihipModuleSymbol_t* hipFunction_t;
-
-typedef struct hipFuncAttributes {
-    int binaryVersion;
-    int cacheModeCA;
-    size_t constSizeBytes;
-    size_t localSizeBytes;
-    int maxDynamicSharedSizeBytes;
-    int maxThreadsPerBlock;
-    int numRegs;
-    int preferredShmemCarveout;
-    int ptxVersion;
-    size_t sharedSizeBytes;
-} hipFuncAttributes;
-
-typedef struct ihipEvent_t* hipEvent_t;
-
-enum hipLimit_t {
-    hipLimitMallocHeapSize = 0x02,
-};
-
-/**
- * @addtogroup GlobalDefs More
- * @{
- */
-//! Flags that can be used with hipStreamCreateWithFlags
-#define hipStreamDefault                                                                           \
-    0x00  ///< Default stream creation flags. These are used with hipStreamCreate().
-#define hipStreamNonBlocking 0x01  ///< Stream does not implicitly synchronize with null stream
-
-
-//! Flags that can be used with hipEventCreateWithFlags:
-#define hipEventDefault 0x0  ///< Default flags
-#define hipEventBlockingSync                                                                       \
-    0x1  ///< Waiting will yield CPU.  Power-friendly and usage-friendly but may increase latency.
-#define hipEventDisableTiming                                                                      \
-    0x2  ///< Disable event's capability to record timing information.  May improve performance.
-#define hipEventInterprocess 0x4  ///< Event can support IPC.  @warning - not supported in HIP.
-#define hipEventReleaseToDevice                                                                    \
-    0x40000000  /// < Use a device-scope release when recording this event.  This flag is useful to
-                /// obtain more precise timings of commands between events.  The flag is a no-op on
-                /// CUDA platforms.
-#define hipEventReleaseToSystem                                                                    \
-    0x80000000  /// < Use a system-scope release that when recording this event.  This flag is
-                /// useful to make non-coherent host memory visible to the host.  The flag is a
-                /// no-op on CUDA platforms.
-
-
-//! Flags that can be used with hipHostMalloc
-#define hipHostMallocDefault 0x0
-#define hipHostMallocPortable 0x1  ///< Memory is considered allocated by all contexts.
-#define hipHostMallocMapped                                                                        \
-    0x2  ///< Map the allocation into the address space for the current device.  The device pointer
-         ///< can be obtained with #hipHostGetDevicePointer.
-#define hipHostMallocWriteCombined 0x4
-#define hipHostMallocNumaUser                                                                      \
-    0x20000000  ///< Host memory allocation will follow numa policy set by user
-
-#define hipHostMallocCoherent                                                                      \
-    0x40000000  ///< Allocate coherent memory. Overrides HIP_COHERENT_HOST_ALLOC for specific
-                ///< allocation.
-#define hipHostMallocNonCoherent                                                                   \
-    0x80000000  ///< Allocate non-coherent memory. Overrides HIP_COHERENT_HOST_ALLOC for specific
-                ///< allocation.
-
-#define hipMemAttachGlobal  0x01    ///< Memory can be accessed by any stream on any device
-#define hipMemAttachHost    0x02    ///< Memory cannot be accessed by any stream on any device
-#define hipMemAttachSingle  0x04    ///< Memory can only be accessed by a single stream on
-                                    ///< the associated device
-
-#define hipDeviceMallocDefault 0x0
-#define hipDeviceMallocFinegrained 0x1  ///< Memory is allocated in fine grained region of device.
-
-//! Flags that can be used with hipHostRegister
-#define hipHostRegisterDefault 0x0   ///< Memory is Mapped and Portable
-#define hipHostRegisterPortable 0x1  ///< Memory is considered registered by all contexts.
-#define hipHostRegisterMapped                                                                      \
-    0x2  ///< Map the allocation into the address space for the current device.  The device pointer
-         ///< can be obtained with #hipHostGetDevicePointer.
-#define hipHostRegisterIoMemory 0x4  ///< Not supported.
-#define hipExtHostRegisterCoarseGrained 0x8  ///< Coarse Grained host memory lock
-
-#define hipDeviceScheduleAuto 0x0  ///< Automatically select between Spin and Yield
-#define hipDeviceScheduleSpin                                                                      \
-    0x1  ///< Dedicate a CPU core to spin-wait.  Provides lowest latency, but burns a CPU core and
-         ///< may consume more power.
-#define hipDeviceScheduleYield                                                                     \
-    0x2  ///< Yield the CPU to the operating system when waiting.  May increase latency, but lowers
-         ///< power and is friendlier to other threads in the system.
-#define hipDeviceScheduleBlockingSync 0x4
-#define hipDeviceScheduleMask 0x7
-
-#define hipDeviceMapHost 0x8
-#define hipDeviceLmemResizeToMax 0x16
-
-#define hipArrayDefault 0x00  ///< Default HIP array allocation flag
-#define hipArrayLayered 0x01
-#define hipArraySurfaceLoadStore 0x02
-#define hipArrayCubemap 0x04
-#define hipArrayTextureGather 0x08
-
-#define hipOccupancyDefault 0x00
-
-#define hipCooperativeLaunchMultiDeviceNoPreSync 0x01
-#define hipCooperativeLaunchMultiDeviceNoPostSync 0x02
-
-#define hipCpuDeviceId ((int)-1)
-#define hipInvalidDeviceId ((int)-2)
-
-// Flags that can be used with hipExtLaunch Set of APIs
-#define hipExtAnyOrderLaunch 0x01  ///< AnyOrderLaunch of kernels
-
-/*
- * @brief HIP Memory Advise values
- * @enum
- * @ingroup Enumerations
- */
-typedef enum hipMemoryAdvise {
-    hipMemAdviseSetReadMostly = 1,          ///< Data will mostly be read and only occassionally
-                                            ///< be written to
-    hipMemAdviseUnsetReadMostly = 2,        ///< Undo the effect of hipMemAdviseSetReadMostly
-    hipMemAdviseSetPreferredLocation = 3,   ///< Set the preferred location for the data as
-                                            ///< the specified device
-    hipMemAdviseUnsetPreferredLocation = 4, ///< Clear the preferred location for the data
-    hipMemAdviseSetAccessedBy = 5,          ///< Data will be accessed by the specified device,
-                                            ///< so prevent page faults as much as possible
-    hipMemAdviseUnsetAccessedBy = 6         ///< Let the Unified Memory subsystem decide on
-                                            ///< the page faulting policy for the specified device
-} hipMemoryAdvise;
-
-/*
- * @brief HIP range attributes
- * @enum
- * @ingroup Enumerations
- */
-typedef enum hipMemRangeAttribute {
-    hipMemRangeAttributeReadMostly = 1,         ///< Whether the range will mostly be read and
-                                                ///< only occassionally be written to
-    hipMemRangeAttributePreferredLocation = 2,  ///< The preferred location of the range
-    hipMemRangeAttributeAccessedBy = 3,         ///< Memory range has cudaMemAdviseSetAccessedBy
-                                                ///< set for specified device
-    hipMemRangeAttributeLastPrefetchLocation = 4,///< The last location to which the range was prefetched
-} hipMemRangeAttribute;
-
-/*
- * @brief hipJitOption
- * @enum
- * @ingroup Enumerations
- */
-typedef enum hipJitOption {
-    hipJitOptionMaxRegisters = 0,
-    hipJitOptionThreadsPerBlock,
-    hipJitOptionWallTime,
-    hipJitOptionInfoLogBuffer,
-    hipJitOptionInfoLogBufferSizeBytes,
-    hipJitOptionErrorLogBuffer,
-    hipJitOptionErrorLogBufferSizeBytes,
-    hipJitOptionOptimizationLevel,
-    hipJitOptionTargetFromContext,
-    hipJitOptionTarget,
-    hipJitOptionFallbackStrategy,
-    hipJitOptionGenerateDebugInfo,
-    hipJitOptionLogVerbose,
-    hipJitOptionGenerateLineInfo,
-    hipJitOptionCacheMode,
-    hipJitOptionSm3xOpt,
-    hipJitOptionFastCompile,
-    hipJitOptionNumOptions
-} hipJitOption;
-
-/**
- * @warning On AMD devices and some Nvidia devices, these hints and controls are ignored.
- */
-typedef enum hipFuncAttribute {
-    hipFuncAttributeMaxDynamicSharedMemorySize = 8,
-    hipFuncAttributePreferredSharedMemoryCarveout = 9,
-    hipFuncAttributeMax
-} hipFuncAttribute;
-
-/**
- * @warning On AMD devices and some Nvidia devices, these hints and controls are ignored.
- */
-typedef enum hipFuncCache_t {
-    hipFuncCachePreferNone,    ///< no preference for shared memory or L1 (default)
-    hipFuncCachePreferShared,  ///< prefer larger shared memory and smaller L1 cache
-    hipFuncCachePreferL1,      ///< prefer larger L1 cache and smaller shared memory
-    hipFuncCachePreferEqual,   ///< prefer equal size L1 cache and shared memory
-} hipFuncCache_t;
-
-/**
- * @warning On AMD devices and some Nvidia devices, these hints and controls are ignored.
- */
-typedef enum hipSharedMemConfig {
-    hipSharedMemBankSizeDefault,  ///< The compiler selects a device-specific value for the banking.
-    hipSharedMemBankSizeFourByte,  ///< Shared mem is banked at 4-bytes intervals and performs best
-                                   ///< when adjacent threads access data 4 bytes apart.
-    hipSharedMemBankSizeEightByte  ///< Shared mem is banked at 8-byte intervals and performs best
-                                   ///< when adjacent threads access data 4 bytes apart.
-} hipSharedMemConfig;
-
-/**
- * Struct for data in 3D
- *
- */
-typedef struct dim3 {
-    uint32_t x;  ///< x
-    uint32_t y;  ///< y
-    uint32_t z;  ///< z
-#ifdef __cplusplus
-    __host__ __device__ dim3(uint32_t _x = 1, uint32_t _y = 1, uint32_t _z = 1) : x(_x), y(_y), z(_z){};
-#endif
-} dim3;
-
-typedef struct hipLaunchParams_t {
-    void* func;             ///< Device function symbol
-    dim3 gridDim;           ///< Grid dimentions
-    dim3 blockDim;          ///< Block dimentions
-    void **args;            ///< Arguments
-    size_t sharedMem;       ///< Shared memory
-    hipStream_t stream;     ///< Stream identifier
-} hipLaunchParams;
-
-#if __HIP_HAS_GET_PCH
-/**
- * Internal use only. This API may change in the future
- * Pre-Compiled header for online compilation
- *
- */
-    void __hipGetPCH(const char** pch, unsigned int*size);
-#endif
-
-
-// Doxygen end group GlobalDefs
-/**  @} */
-
-
-//-------------------------------------------------------------------------------------------------
-
-
-// The handle allows the async commands to use the stream even if the parent hipStream_t goes
-// out-of-scope.
-// typedef class ihipStream_t * hipStream_t;
-
-
-/*
- * Opaque structure allows the true event (pointed at by the handle) to remain "live" even if the
- * surrounding hipEvent_t goes out-of-scope. This is handy for cases where the hipEvent_t goes
- * out-of-scope but the true event is being written by some async queue or device */
-// typedef struct hipEvent_t {
-//    struct ihipEvent_t *_handle;
-//} hipEvent_t;
-
-
-/**
- *  @defgroup API HIP API
- *  @{
- *
- *  Defines the HIP API.  See the individual sections for more information.
- */
-
-
-/**
- *  @defgroup Driver Initialization and Version
- *  @{
- *  This section describes the initializtion and version functions of HIP runtime API.
- *
- */
-
-/**
- * @brief Explicitly initializes the HIP runtime.
- *
- * Most HIP APIs implicitly initialize the HIP runtime.
- * This API provides control over the timing of the initialization.
- */
-// TODO-ctx - more description on error codes.
-hipError_t hipInit(unsigned int flags);
-
-/**
- * @brief Returns the approximate HIP driver version.
- *
- * @param [out] driverVersion
- *
- * @returns #hipSuccess, #hipErrorInavlidValue
- *
- * @warning The HIP feature set does not correspond to an exact CUDA SDK driver revision.
- * This function always set *driverVersion to 4 as an approximation though HIP supports
- * some features which were introduced in later CUDA SDK revisions.
- * HIP apps code should not rely on the driver revision number here and should
- * use arch feature flags to test device capabilities or conditional compilation.
- *
- * @see hipRuntimeGetVersion
- */
-hipError_t hipDriverGetVersion(int* driverVersion);
-
-/**
- * @brief Returns the approximate HIP Runtime version.
- *
- * @param [out] runtimeVersion
- *
- * @returns #hipSuccess, #hipErrorInavlidValue
- *
- * @warning On HIP/HCC path this function returns HIP runtime patch version however on
- * HIP/NVCC path this function return CUDA runtime version.
- *
- * @see hipDriverGetVersion
- */
-hipError_t hipRuntimeGetVersion(int* runtimeVersion);
-
-
-/**
- * @brief Returns a handle to a compute device
- * @param [out] device
- * @param [in] ordinal
- *
- * @returns #hipSuccess, #hipErrorInavlidDevice
- */
-hipError_t hipDeviceGet(hipDevice_t* device, int ordinal);
-
-/**
- * @brief Returns the compute capability of the device
- * @param [out] major
- * @param [out] minor
- * @param [in] device
- *
- * @returns #hipSuccess, #hipErrorInavlidDevice
- */
-hipError_t hipDeviceComputeCapability(int* major, int* minor, hipDevice_t device);
-
-/**
- * @brief Returns an identifer string for the device.
- * @param [out] name
- * @param [in] len
- * @param [in] device
- *
- * @returns #hipSuccess, #hipErrorInavlidDevice
- */
-hipError_t hipDeviceGetName(char* name, int len, hipDevice_t device);
-
-
-/**
- * @brief Returns a value for attr of link between two devices
- * @param [out] value
- * @param [in] attr
- * @param [in] srcDevice
- * @param [in] dstDevice
- *
- * @returns #hipSuccess, #hipErrorInavlidDevice
- */
-hipError_t hipDeviceGetP2PAttribute(int* value, hipDeviceP2PAttr attr,
-                                    int srcDevice, int dstDevice);
-
-/**
- * @brief Returns a PCI Bus Id string for the device, overloaded to take int device ID.
- * @param [out] pciBusId
- * @param [in] len
- * @param [in] device
- *
- * @returns #hipSuccess, #hipErrorInavlidDevice
- */
-hipError_t hipDeviceGetPCIBusId(char* pciBusId, int len, int device);
-
-
-/**
- * @brief Returns a handle to a compute device.
- * @param [out] device handle
- * @param [in] PCI Bus ID
- *
- * @returns #hipSuccess, #hipErrorInavlidDevice, #hipErrorInvalidValue
- */
-hipError_t hipDeviceGetByPCIBusId(int* device, const char* pciBusId);
-
-
-/**
- * @brief Returns the total amount of memory on the device.
- * @param [out] bytes
- * @param [in] device
- *
- * @returns #hipSuccess, #hipErrorInavlidDevice
- */
-hipError_t hipDeviceTotalMem(size_t* bytes, hipDevice_t device);
-
-
-// doxygen end initialization
-/**
- * @}
- */
-
-/**
- *  @defgroup Device Device Management
- *  @{
- *  This section describes the device management functions of HIP runtime API.
- */
-
-/**
- * @brief Waits on all active streams on current device
- *
- * When this command is invoked, the host thread gets blocked until all the commands associated
- * with streams associated with the device. HIP does not support multiple blocking modes (yet!).
- *
- * @returns #hipSuccess
- *
- * @see hipSetDevice, hipDeviceReset
- */
-hipError_t hipDeviceSynchronize(void);
-
-
-/**
- * @brief The state of current device is discarded and updated to a fresh state.
- *
- * Calling this function deletes all streams created, memory allocated, kernels running, events
- * created. Make sure that no other thread is using the device or streams, memory, kernels, events
- * associated with the current device.
- *
- * @returns #hipSuccess
- *
- * @see hipDeviceSynchronize
- */
-hipError_t hipDeviceReset(void);
-
-
-/**
- * @brief Set default device to be used for subsequent hip API calls from this thread.
- *
- * @param[in] deviceId Valid device in range 0...hipGetDeviceCount().
- *
- * Sets @p device as the default device for the calling host thread.  Valid device id's are 0...
- * (hipGetDeviceCount()-1).
- *
- * Many HIP APIs implicitly use the "default device" :
- *
- * - Any device memory subsequently allocated from this host thread (using hipMalloc) will be
- * allocated on device.
- * - Any streams or events created from this host thread will be associated with device.
- * - Any kernels launched from this host thread (using hipLaunchKernel) will be executed on device
- * (unless a specific stream is specified, in which case the device associated with that stream will
- * be used).
- *
- * This function may be called from any host thread.  Multiple host threads may use the same device.
- * This function does no synchronization with the previous or new device, and has very little
- * runtime overhead. Applications can use hipSetDevice to quickly switch the default device before
- * making a HIP runtime call which uses the default device.
- *
- * The default device is stored in thread-local-storage for each thread.
- * Thread-pool implementations may inherit the default device of the previous thread.  A good
- * practice is to always call hipSetDevice at the start of HIP coding sequency to establish a known
- * standard device.
- *
- * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorDeviceAlreadyInUse
- *
- * @see hipGetDevice, hipGetDeviceCount
- */
-hipError_t hipSetDevice(int deviceId);
-
-
-/**
- * @brief Return the default device id for the calling host thread.
- *
- * @param [out] device *device is written with the default device
- *
- * HIP maintains an default device for each thread using thread-local-storage.
- * This device is used implicitly for HIP runtime APIs called by this thread.
- * hipGetDevice returns in * @p device the default device for the calling host thread.
- *
- * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue
- *
- * @see hipSetDevice, hipGetDevicesizeBytes
- */
-hipError_t hipGetDevice(int* deviceId);
-
-
-/**
- * @brief Return number of compute-capable devices.
- *
- * @param [output] count Returns number of compute-capable devices.
- *
- * @returns #hipSuccess, #hipErrorNoDevice
- *
- *
- * Returns in @p *count the number of devices that have ability to run compute commands.  If there
- * are no such devices, then @ref hipGetDeviceCount will return #hipErrorNoDevice. If 1 or more
- * devices can be found, then hipGetDeviceCount returns #hipSuccess.
- */
-hipError_t hipGetDeviceCount(int* count);
-
-/**
- * @brief Query for a specific device attribute.
- *
- * @param [out] pi pointer to value to return
- * @param [in] attr attribute to query
- * @param [in] deviceId which device to query for information
- *
- * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue
- */
-hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int deviceId);
-
-/**
- * @brief Returns device properties.
- *
- * @param [out] prop written with device properties
- * @param [in]  deviceId which device to query for information
- *
- * @return #hipSuccess, #hipErrorInvalidDevice
- * @bug HCC always returns 0 for maxThreadsPerMultiProcessor
- * @bug HCC always returns 0 for regsPerBlock
- * @bug HCC always returns 0 for l2CacheSize
- *
- * Populates hipGetDeviceProperties with information for the specified device.
- */
-hipError_t hipGetDeviceProperties(hipDeviceProp_t* prop, int deviceId);
-
-
-/**
- * @brief Set L1/Shared cache partition.
- *
- * @param [in] cacheConfig
- *
- * @returns #hipSuccess, #hipErrorNotInitialized
- * Note: AMD devices and some Nvidia GPUS do not support reconfigurable cache.  This hint is ignored
- * on those architectures.
- *
- */
-hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig);
-
-
-/**
- * @brief Set Cache configuration for a specific function
- *
- * @param [in] cacheConfig
- *
- * @returns #hipSuccess, #hipErrorNotInitialized
- * Note: AMD devices and some Nvidia GPUS do not support reconfigurable cache.  This hint is ignored
- * on those architectures.
- *
- */
-hipError_t hipDeviceGetCacheConfig(hipFuncCache_t* cacheConfig);
-
-/**
- * @brief Get Resource limits of current device
- *
- * @param [out] pValue
- * @param [in]  limit
- *
- * @returns #hipSuccess, #hipErrorUnsupportedLimit, #hipErrorInvalidValue
- * Note: Currently, only hipLimitMallocHeapSize is available
- *
- */
-hipError_t hipDeviceGetLimit(size_t* pValue, enum hipLimit_t limit);
-
-
-/**
- * @brief Returns bank width of shared memory for current device
- *
- * @param [out] pConfig
- *
- * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized
- *
- * Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is
- * ignored on those architectures.
- *
- */
-hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig* pConfig);
-
-/**
- * @brief Gets the flags set for current device
- *
- * @param [out] flags
- *
- * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue
- */
-hipError_t hipGetDeviceFlags(unsigned int* flags);
-
-/**
- * @brief The bank width of shared memory on current device is set
- *
- * @param [in] config
- *
- * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized
- *
- * Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is
- * ignored on those architectures.
- *
- */
-hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config);
-
-/**
- * @brief The current device behavior is changed according the flags passed.
- *
- * @param [in] flags
- *
- * The schedule flags impact how HIP waits for the completion of a command running on a device.
- * hipDeviceScheduleSpin         : HIP runtime will actively spin in the thread which submitted the
- * work until the command completes.  This offers the lowest latency, but will consume a CPU core
- * and may increase power. hipDeviceScheduleYield        : The HIP runtime will yield the CPU to
- * system so that other tasks can use it.  This may increase latency to detect the completion but
- * will consume less power and is friendlier to other tasks in the system.
- * hipDeviceScheduleBlockingSync : On ROCm platform, this is a synonym for hipDeviceScheduleYield.
- * hipDeviceScheduleAuto         : Use a hueristic to select between Spin and Yield modes.  If the
- * number of HIP contexts is greater than the number of logical processors in the system, use Spin
- * scheduling.  Else use Yield scheduling.
- *
- *
- * hipDeviceMapHost              : Allow mapping host memory.  On ROCM, this is always allowed and
- * the flag is ignored. hipDeviceLmemResizeToMax      : @warning ROCm silently ignores this flag.
- *
- * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorSetOnActiveProcess
- *
- *
- */
-hipError_t hipSetDeviceFlags(unsigned flags);
-
-/**
- * @brief Device which matches hipDeviceProp_t is returned
- *
- * @param [out] device ID
- * @param [in]  device properties pointer
- *
- * @returns #hipSuccess, #hipErrorInvalidValue
- */
-hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* prop);
-
-/**
- * @brief Returns the link type and hop count between two devices
- *
- * @param [in] device1 Ordinal for device1
- * @param [in] device2 Ordinal for device2
- * @param [out] linktype Returns the link type (See hsa_amd_link_info_type_t) between the two devices
- * @param [out] hopcount Returns the hop count between the two devices
- *
- * Queries and returns the HSA link type and the hop count between the two specified devices.
- *
- * @returns #hipSuccess, #hipInvalidDevice, #hipErrorRuntimeOther
- */
-hipError_t hipExtGetLinkTypeAndHopCount(int device1, int device2, uint32_t* linktype, uint32_t* hopcount);
-
-
-// TODO: implement IPC apis
-
-/**
- * @brief Gets an interprocess memory handle for an existing device memory
- *          allocation
- *
- * Takes a pointer to the base of an existing device memory allocation created
- * with hipMalloc and exports it for use in another process. This is a
- * lightweight operation and may be called multiple times on an allocation
- * without adverse effects.
- *
- * If a region of memory is freed with hipFree and a subsequent call
- * to hipMalloc returns memory with the same device address,
- * hipIpcGetMemHandle will return a unique handle for the
- * new memory.
- *
- * @param handle - Pointer to user allocated hipIpcMemHandle to return
- *                    the handle in.
- * @param devPtr - Base pointer to previously allocated device memory
- *
- * @returns
- * hipSuccess,
- * hipErrorInvalidHandle,
- * hipErrorOutOfMemory,
- * hipErrorMapFailed,
- *
- */
-hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr);
-
-/**
- * @brief Opens an interprocess memory handle exported from another process
- *          and returns a device pointer usable in the local process.
- *
- * Maps memory exported from another process with hipIpcGetMemHandle into
- * the current device address space. For contexts on different devices
- * hipIpcOpenMemHandle can attempt to enable peer access between the
- * devices as if the user called hipDeviceEnablePeerAccess. This behavior is
- * controlled by the hipIpcMemLazyEnablePeerAccess flag.
- * hipDeviceCanAccessPeer can determine if a mapping is possible.
- *
- * Contexts that may open hipIpcMemHandles are restricted in the following way.
- * hipIpcMemHandles from each device in a given process may only be opened
- * by one context per device per other process.
- *
- * Memory returned from hipIpcOpenMemHandle must be freed with
- * hipIpcCloseMemHandle.
- *
- * Calling hipFree on an exported memory region before calling
- * hipIpcCloseMemHandle in the importing context will result in undefined
- * behavior.
- *
- * @param devPtr - Returned device pointer
- * @param handle - hipIpcMemHandle to open
- * @param flags  - Flags for this operation. Must be specified as hipIpcMemLazyEnablePeerAccess
- *
- * @returns
- * hipSuccess,
- * hipErrorMapFailed,
- * hipErrorInvalidHandle,
- * hipErrorTooManyPeers
- *
- * @note No guarantees are made about the address returned in @p *devPtr.
- * In particular, multiple processes may not receive the same address for the same @p handle.
- *
- */
-hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned int flags);
-
-/**
- * @brief Close memory mapped with hipIpcOpenMemHandle
- *
- * Unmaps memory returnd by hipIpcOpenMemHandle. The original allocation
- * in the exporting process as well as imported mappings in other processes
- * will be unaffected.
- *
- * Any resources used to enable peer access will be freed if this is the
- * last mapping using them.
- *
- * @param devPtr - Device pointer returned by hipIpcOpenMemHandle
- *
- * @returns
- * hipSuccess,
- * hipErrorMapFailed,
- * hipErrorInvalidHandle,
- *
- */
-hipError_t hipIpcCloseMemHandle(void* devPtr);
-
-
-hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event);
-hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle);
-
-// end doxygen Device
-/**
- * @}
- */
-
-/**
- *
- *  @defgroup Execution Execution Control
- *  @{
- *  This section describes the execution control functions of HIP runtime API.
- *
- */
-/**
- * @brief Set attribute for a specific function
- *
- * @param [in] func;
- * @param [in] attr;
- * @param [in] value;
- *
- * @returns #hipSuccess, #hipErrorInvalidDeviceFunction, #hipErrorInvalidValue
- *
- * Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is
- * ignored on those architectures.
- *
- */
-hipError_t hipFuncSetAttribute(const void* func, hipFuncAttribute attr, int value);
-
-/**
- * @brief Set Cache configuration for a specific function
- *
- * @param [in] config;
- *
- * @returns #hipSuccess, #hipErrorNotInitialized
- * Note: AMD devices and some Nvidia GPUS do not support reconfigurable cache.  This hint is ignored
- * on those architectures.
- *
- */
-hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t config);
-
-/**
- * @brief Set shared memory configuation for a specific function
- *
- * @param [in] func
- * @param [in] config
- *
- * @returns #hipSuccess, #hipErrorInvalidDeviceFunction, #hipErrorInvalidValue
- *
- * Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is
- * ignored on those architectures.
- *
- */
-hipError_t hipFuncSetSharedMemConfig(const void* func, hipSharedMemConfig config);
-
-//doxygen end execution
-/**
- * @}
- */
-
-/**
- *-------------------------------------------------------------------------------------------------
- *-------------------------------------------------------------------------------------------------
- *  @defgroup Error Error Handling
- *  @{
- *  This section describes the error handling functions of HIP runtime API.
- */
-
-/**
- * @brief Return last error returned by any HIP runtime API call and resets the stored error code to
- * #hipSuccess
- *
- * @returns return code from last HIP called from the active host thread
- *
- * Returns the last error that has been returned by any of the runtime calls in the same host
- * thread, and then resets the saved error to #hipSuccess.
- *
- * @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t
- */
-hipError_t hipGetLastError(void);
-
-
-/**
- * @brief Return last error returned by any HIP runtime API call.
- *
- * @return #hipSuccess
- *
- * Returns the last error that has been returned by any of the runtime calls in the same host
- * thread. Unlike hipGetLastError, this function does not reset the saved error code.
- *
- * @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t
- */
-hipError_t hipPeekAtLastError(void);
-
-
-/**
- * @brief Return name of the specified error code in text form.
- *
- * @param hip_error Error code to convert to name.
- * @return const char pointer to the NULL-terminated error name
- *
- * @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t
- */
-const char* hipGetErrorName(hipError_t hip_error);
-
-
-/**
- * @brief Return handy text string message to explain the error which occurred
- *
- * @param hipError Error code to convert to string.
- * @return const char pointer to the NULL-terminated error string
- *
- * @warning : on HCC, this function returns the name of the error (same as hipGetErrorName)
- *
- * @see hipGetErrorName, hipGetLastError, hipPeakAtLastError, hipError_t
- */
-const char* hipGetErrorString(hipError_t hipError);
-
-// end doxygen Error
-/**
- * @}
- */
-
-
-/**
- *-------------------------------------------------------------------------------------------------
- *-------------------------------------------------------------------------------------------------
- *  @defgroup Stream Stream Management
- *  @{
- *  This section describes the stream management functions of HIP runtime API.
- *  The following Stream APIs are not (yet) supported in HIP:
- *  - cudaStreamAttachMemAsync
- */
-
-
-/**
- * @brief Create an asynchronous stream.
- *
- * @param[in, out] stream Valid pointer to hipStream_t.  This function writes the memory with the
- * newly created stream.
- * @return #hipSuccess, #hipErrorInvalidValue
- *
- * Create a new asynchronous stream.  @p stream returns an opaque handle that can be used to
- * reference the newly created stream in subsequent hipStream* commands.  The stream is allocated on
- * the heap and will remain allocated even if the handle goes out-of-scope.  To release the memory
- * used by the stream, applicaiton must call hipStreamDestroy.
- *
- * @return #hipSuccess, #hipErrorInvalidValue
- *
- * @see hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy
- */
-hipError_t hipStreamCreate(hipStream_t* stream);
-
-
-/**
- * @brief Create an asynchronous stream.
- *
- * @param[in, out] stream Pointer to new stream
- * @param[in ] flags to control stream creation.
- * @return #hipSuccess, #hipErrorInvalidValue
- *
- * Create a new asynchronous stream.  @p stream returns an opaque handle that can be used to
- * reference the newly created stream in subsequent hipStream* commands.  The stream is allocated on
- * the heap and will remain allocated even if the handle goes out-of-scope.  To release the memory
- * used by the stream, applicaiton must call hipStreamDestroy. Flags controls behavior of the
- * stream.  See #hipStreamDefault, #hipStreamNonBlocking.
- *
- *
- * @see hipStreamCreate, hipStreamCreateWithPriority, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy
- */
-
-hipError_t hipStreamCreateWithFlags(hipStream_t* stream, unsigned int flags);
-
-
-/**
- * @brief Create an asynchronous stream with the specified priority.
- *
- * @param[in, out] stream Pointer to new stream
- * @param[in ] flags to control stream creation.
- * @param[in ] priority of the stream. Lower numbers represent higher priorities.
- * @return #hipSuccess, #hipErrorInvalidValue
- *
- * Create a new asynchronous stream with the specified priority.  @p stream returns an opaque handle
- * that can be used to reference the newly created stream in subsequent hipStream* commands.  The
- * stream is allocated on the heap and will remain allocated even if the handle goes out-of-scope.
- * To release the memory used by the stream, applicaiton must call hipStreamDestroy. Flags controls
- * behavior of the stream.  See #hipStreamDefault, #hipStreamNonBlocking.
- *
- *
- * @see hipStreamCreate, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy
- */
-
-hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority);
-
-
-/**
- * @brief Returns numerical values that correspond to the least and greatest stream priority.
- *
- * @param[in, out] leastPriority pointer in which value corresponding to least priority is returned.
- * @param[in, out] greatestPriority pointer in which value corresponding to greatest priority is returned.
- *
- * Returns in *leastPriority and *greatestPriority the numerical values that correspond to the least
- * and greatest stream priority respectively. Stream priorities follow a convention where lower numbers
- * imply greater priorities. The range of meaningful stream priorities is given by
- * [*greatestPriority, *leastPriority]. If the user attempts to create a stream with a priority value
- * that is outside the the meaningful range as specified by this API, the priority is automatically
- * clamped to within the valid range.
- */
-
-hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority);
-
-
-/**
- * @brief Destroys the specified stream.
- *
- * @param[in, out] stream Valid pointer to hipStream_t.  This function writes the memory with the
- * newly created stream.
- * @return #hipSuccess #hipErrorInvalidHandle
- *
- * Destroys the specified stream.
- *
- * If commands are still executing on the specified stream, some may complete execution before the
- * queue is deleted.
- *
- * The queue may be destroyed while some commands are still inflight, or may wait for all commands
- * queued to the stream before destroying it.
- *
- * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamQuery, hipStreamWaitEvent,
- * hipStreamSynchronize
- */
-hipError_t hipStreamDestroy(hipStream_t stream);
-
-
-/**
- * @brief Return #hipSuccess if all of the operations in the specified @p stream have completed, or
- * #hipErrorNotReady if not.
- *
- * @param[in] stream stream to query
- *
- * @return #hipSuccess, #hipErrorNotReady, #hipErrorInvalidHandle
- *
- * This is thread-safe and returns a snapshot of the current state of the queue.  However, if other
- * host threads are sending work to the stream, the status may change immediately after the function
- * is called.  It is typically used for debug.
- *
- * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamWaitEvent, hipStreamSynchronize,
- * hipStreamDestroy
- */
-hipError_t hipStreamQuery(hipStream_t stream);
-
-
-/**
- * @brief Wait for all commands in stream to complete.
- *
- * @param[in] stream stream identifier.
- *
- * @return #hipSuccess, #hipErrorInvalidHandle
- *
- * This command is host-synchronous : the host will block until the specified stream is empty.
- *
- * This command follows standard null-stream semantics.  Specifically, specifying the null stream
- * will cause the command to wait for other streams on the same device to complete all pending
- * operations.
- *
- * This command honors the hipDeviceLaunchBlocking flag, which controls whether the wait is active
- * or blocking.
- *
- * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamWaitEvent, hipStreamDestroy
- *
- */
-hipError_t hipStreamSynchronize(hipStream_t stream);
-
-
-/**
- * @brief Make the specified compute stream wait for an event
- *
- * @param[in] stream stream to make wait.
- * @param[in] event event to wait on
- * @param[in] flags control operation [must be 0]
- *
- * @return #hipSuccess, #hipErrorInvalidHandle
- *
- * This function inserts a wait operation into the specified stream.
- * All future work submitted to @p stream will wait until @p event reports completion before
- * beginning execution.
- *
- * This function only waits for commands in the current stream to complete.  Notably,, this function
- * does not impliciy wait for commands in the default stream to complete, even if the specified
- * stream is created with hipStreamNonBlocking = 0.
- *
- * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamSynchronize, hipStreamDestroy
- */
-hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags);
-
-
-/**
- * @brief Return flags associated with this stream.
- *
- * @param[in] stream stream to be queried
- * @param[in,out] flags Pointer to an unsigned integer in which the stream's flags are returned
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidHandle
- *
- * @returns #hipSuccess #hipErrorInvalidValue #hipErrorInvalidHandle
- *
- * Return flags associated with this stream in *@p flags.
- *
- * @see hipStreamCreateWithFlags
- */
-hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int* flags);
-
-
-/**
- * @brief Query the priority of a stream.
- *
- * @param[in] stream stream to be queried
- * @param[in,out] priority Pointer to an unsigned integer in which the stream's priority is returned
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidHandle
- *
- * @returns #hipSuccess #hipErrorInvalidValue #hipErrorInvalidHandle
- *
- * Query the priority of a stream. The priority is returned in in priority.
- *
- * @see hipStreamCreateWithFlags
- */
-hipError_t hipStreamGetPriority(hipStream_t stream, int* priority);
-
-
-/**
- * @brief Create an asynchronous stream with the specified CU mask.
- *
- * @param[in, out] stream Pointer to new stream
- * @param[in ] cuMaskSize Size of CU mask bit array passed in.
- * @param[in ] cuMask Bit-vector representing the CU mask. Each active bit represents using one CU.
- * The first 32 bits represent the first 32 CUs, and so on. If its size is greater than physical
- * CU number (i.e., multiProcessorCount member of hipDeviceProp_t), the extra elements are ignored.
- * It is user's responsibility to make sure the input is meaningful.
- * @return #hipSuccess, #hipErrorInvalidHandle, #hipErrorInvalidValue
- *
- * Create a new asynchronous stream with the specified CU mask.  @p stream returns an opaque handle
- * that can be used to reference the newly created stream in subsequent hipStream* commands.  The
- * stream is allocated on the heap and will remain allocated even if the handle goes out-of-scope.
- * To release the memory used by the stream, application must call hipStreamDestroy.
- *
- *
- * @see hipStreamCreate, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy
- */
-hipError_t hipExtStreamCreateWithCUMask(hipStream_t* stream, uint32_t cuMaskSize, const uint32_t* cuMask);
-
-
-/**
- * @brief Get CU mask associated with an asynchronous stream
- *
- * @param[in] stream stream to be queried
- * @param[in] cuMaskSize number of the block of memories (uint32_t *) allocated by user
- * @param[out] cuMask Pointer to a pre-allocated block of memories (uint32_t *) in which
- * the stream's CU mask is returned. The CU mask is returned in a chunck of 32 bits where
- * each active bit represents one active CU
- * @return #hipSuccess, #hipErrorInvalidHandle, #hipErrorInvalidValue
- *
- * @see hipStreamCreate, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy
- */
-hipError_t hipExtStreamGetCUMask(hipStream_t stream, uint32_t cuMaskSize, uint32_t* cuMask);
-
-/**
- * Stream CallBack struct
- */
-typedef void (*hipStreamCallback_t)(hipStream_t stream, hipError_t status, void* userData);
-
-/**
- * @brief Adds a callback to be called on the host after all currently enqueued
- * items in the stream have completed.  For each
- * cudaStreamAddCallback call, a callback will be executed exactly once.
- * The callback will block later work in the stream until it is finished.
- * @param[in] stream   - Stream to add callback to
- * @param[in] callback - The function to call once preceding stream operations are complete
- * @param[in] userData - User specified data to be passed to the callback function
- * @param[in] flags    - Reserved for future use, must be 0
- * @return #hipSuccess, #hipErrorInvalidHandle, #hipErrorNotSupported
- *
- * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamQuery, hipStreamSynchronize,
- * hipStreamWaitEvent, hipStreamDestroy, hipStreamCreateWithPriority
- *
- */
-hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData,
-                                unsigned int flags);
-
-
-// end doxygen Stream
-/**
- * @}
- */
-
-
-/**
- *-------------------------------------------------------------------------------------------------
- *-------------------------------------------------------------------------------------------------
- *  @defgroup Event Event Management
- *  @{
- *  This section describes the event management functions of HIP runtime API.
- */
-
-/**
- * @brief Create an event with the specified flags
- *
- * @param[in,out] event Returns the newly created event.
- * @param[in] flags     Flags to control event behavior.  Valid values are #hipEventDefault,
- #hipEventBlockingSync, #hipEventDisableTiming, #hipEventInterprocess
-
- * #hipEventDefault : Default flag.  The event will use active synchronization and will support
- timing.  Blocking synchronization provides lowest possible latency at the expense of dedicating a
- CPU to poll on the event.
- * #hipEventBlockingSync : The event will use blocking synchronization : if hipEventSynchronize is
- called on this event, the thread will block until the event completes.  This can increase latency
- for the synchroniation but can result in lower power and more resources for other CPU threads.
- * #hipEventDisableTiming : Disable recording of timing information.
-
- * @warning On AMD platform, hipEventInterprocess support is under development.  Use of this flag
- will return an error.
- *
- * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue,
- #hipErrorLaunchFailure, #hipErrorOutOfMemory
- *
- * @see hipEventCreate, hipEventSynchronize, hipEventDestroy, hipEventElapsedTime
- */
-hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags);
-
-
-/**
- *  Create an event
- *
- * @param[in,out] event Returns the newly created event.
- *
- * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue,
- * #hipErrorLaunchFailure, #hipErrorOutOfMemory
- *
- * @see hipEventCreateWithFlags, hipEventRecord, hipEventQuery, hipEventSynchronize,
- * hipEventDestroy, hipEventElapsedTime
- */
-hipError_t hipEventCreate(hipEvent_t* event);
-
-
-/**
- * @brief Record an event in the specified stream.
- *
- * @param[in] event event to record.
- * @param[in] stream stream in which to record event.
- * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized,
- * #hipErrorInvalidHandle, #hipErrorLaunchFailure
- *
- * hipEventQuery() or hipEventSynchronize() must be used to determine when the event
- * transitions from "recording" (after hipEventRecord() is called) to "recorded"
- * (when timestamps are set, if requested).
- *
- * Events which are recorded in a non-NULL stream will transition to
- * from recording to "recorded" state when they reach the head of
- * the specified stream, after all previous
- * commands in that stream have completed executing.
- *
- * If hipEventRecord() has been previously called on this event, then this call will overwrite any
- * existing state in event.
- *
- * If this function is called on an event that is currently being recorded, results are undefined
- * - either outstanding recording may save state into the event, and the order is not guaranteed.
- *
- * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventSynchronize,
- * hipEventDestroy, hipEventElapsedTime
- *
- */
-#ifdef __cplusplus
-hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream = NULL);
-#else
-hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream);
-#endif
-
-/**
- *  @brief Destroy the specified event.
- *
- *  @param[in] event Event to destroy.
- *  @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue,
- * #hipErrorLaunchFailure
- *
- *  Releases memory associated with the event.  If the event is recording but has not completed
- * recording when hipEventDestroy() is called, the function will return immediately and the
- * completion_future resources will be released later, when the hipDevice is synchronized.
- *
- * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventSynchronize, hipEventRecord,
- * hipEventElapsedTime
- *
- * @returns #hipSuccess
- */
-hipError_t hipEventDestroy(hipEvent_t event);
-
-
-/**
- *  @brief Wait for an event to complete.
- *
- *  This function will block until the event is ready, waiting for all previous work in the stream
- * specified when event was recorded with hipEventRecord().
- *
- *  If hipEventRecord() has not been called on @p event, this function returns immediately.
- *
- *  TODO-hip- This function needs to support hipEventBlockingSync parameter.
- *
- *  @param[in] event Event on which to wait.
- *  @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized,
- * #hipErrorInvalidHandle, #hipErrorLaunchFailure
- *
- *  @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventDestroy, hipEventRecord,
- * hipEventElapsedTime
- */
-hipError_t hipEventSynchronize(hipEvent_t event);
-
-
-/**
- * @brief Return the elapsed time between two events.
- *
- * @param[out] ms : Return time between start and stop in ms.
- * @param[in]   start : Start event.
- * @param[in]   stop  : Stop event.
- * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotReady, #hipErrorInvalidHandle,
- * #hipErrorNotInitialized, #hipErrorLaunchFailure
- *
- * Computes the elapsed time between two events. Time is computed in ms, with
- * a resolution of approximately 1 us.
- *
- * Events which are recorded in a NULL stream will block until all commands
- * on all other streams complete execution, and then record the timestamp.
- *
- * Events which are recorded in a non-NULL stream will record their timestamp
- * when they reach the head of the specified stream, after all previous
- * commands in that stream have completed executing.  Thus the time that
- * the event recorded may be significantly after the host calls hipEventRecord().
- *
- * If hipEventRecord() has not been called on either event, then #hipErrorInvalidHandle is
- * returned. If hipEventRecord() has been called on both events, but the timestamp has not yet been
- * recorded on one or both events (that is, hipEventQuery() would return #hipErrorNotReady on at
- * least one of the events), then #hipErrorNotReady is returned.
- *
- * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventDestroy, hipEventRecord,
- * hipEventSynchronize
- */
-hipError_t hipEventElapsedTime(float* ms, hipEvent_t start, hipEvent_t stop);
-
-
-/**
- * @brief Query event status
- *
- * @param[in] event Event to query.
- * @returns #hipSuccess, #hipErrorNotReady, #hipErrorInvalidHandle, #hipErrorInvalidValue,
- * #hipErrorNotInitialized, #hipErrorLaunchFailure
- *
- * Query the status of the specified event.  This function will return #hipErrorNotReady if all
- * commands in the appropriate stream (specified to hipEventRecord()) have completed.  If that work
- * has not completed, or if hipEventRecord() was not called on the event, then #hipSuccess is
- * returned.
- *
- * @see hipEventCreate, hipEventCreateWithFlags, hipEventRecord, hipEventDestroy,
- * hipEventSynchronize, hipEventElapsedTime
- */
-hipError_t hipEventQuery(hipEvent_t event);
-
-
-// end doxygen Events
-/**
- * @}
- */
-
-
-/**
- *-------------------------------------------------------------------------------------------------
- *-------------------------------------------------------------------------------------------------
- *  @defgroup Memory Memory Management
- *  @{
- *  This section describes the memory management functions of HIP runtime API.
- *  The following CUDA APIs are not currently supported:
- *  - cudaMalloc3D
- *  - cudaMalloc3DArray
- *  - TODO - more 2D, 3D, array APIs here.
- *
- *
- */
-
-/**
- *  @brief Return attributes for the specified pointer
- *
- *  @param[out] attributes for the specified pointer
- *  @param[in]  pointer to get attributes for
- *
- *  @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue
- *
- *  @see hipGetDeviceCount, hipGetDevice, hipSetDevice, hipChooseDevice
- */
-hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr);
-
-/**
- *  @brief Allocate memory on the default accelerator
- *
- *  @param[out] ptr Pointer to the allocated memory
- *  @param[in]  size Requested memory size
- *
- *  If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.
- *
- *  @return #hipSuccess, #hipErrorOutOfMemory, #hipErrorInvalidValue (bad context, null *ptr)
- *
- *  @see hipMallocPitch, hipFree, hipMallocArray, hipFreeArray, hipMalloc3D, hipMalloc3DArray,
- * hipHostFree, hipHostMalloc
- */
-hipError_t hipMalloc(void** ptr, size_t size);
-
-/**
- *  @brief Allocate memory on the default accelerator
- *
- *  @param[out] ptr Pointer to the allocated memory
- *  @param[in]  size Requested memory size
- *  @param[in]  flags Type of memory allocation
- *
- *  If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.
- *
- *  @return #hipSuccess, #hipErrorOutOfMemory, #hipErrorInvalidValue (bad context, null *ptr)
- *
- *  @see hipMallocPitch, hipFree, hipMallocArray, hipFreeArray, hipMalloc3D, hipMalloc3DArray,
- * hipHostFree, hipHostMalloc
- */
-hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flags);
-
-/**
- *  @brief Allocate pinned host memory [Deprecated]
- *
- *  @param[out] ptr Pointer to the allocated host pinned memory
- *  @param[in]  size Requested memory size
- *
- *  If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.
- *
- *  @return #hipSuccess, #hipErrorOutOfMemory
- *
- *  @deprecated use hipHostMalloc() instead
- */
-DEPRECATED("use hipHostMalloc instead")
-hipError_t hipMallocHost(void** ptr, size_t size);
-
-/**
- *  @brief Allocate pinned host memory [Deprecated]
- *
- *  @param[out] ptr Pointer to the allocated host pinned memory
- *  @param[in]  size Requested memory size
- *
- *  If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.
- *
- *  @return #hipSuccess, #hipErrorOutOfMemory
- *
- *  @deprecated use hipHostMalloc() instead
- */
-DEPRECATED("use hipHostMalloc instead")
-hipError_t hipMemAllocHost(void** ptr, size_t size);
-
-/**
- *  @brief Allocate device accessible page locked host memory
- *
- *  @param[out] ptr Pointer to the allocated host pinned memory
- *  @param[in]  size Requested memory size
- *  @param[in]  flags Type of host memory allocation
- *
- *  If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.
- *
- *  @return #hipSuccess, #hipErrorOutOfMemory
- *
- *  @see hipSetDeviceFlags, hipHostFree
- */
-hipError_t hipHostMalloc(void** ptr, size_t size, unsigned int flags);
-
-/**
- *-------------------------------------------------------------------------------------------------
- *-------------------------------------------------------------------------------------------------
- *  @addtogroup MemoryM Managed Memory (ROCm HMM)
- *  @{
- *  @ingroup Memory
- *  This section describes the managed memory management functions of HIP runtime API.
- *
- */
-
-/**
- * @brief Allocates memory that will be automatically managed by AMD HMM.
- *
- * @param [out] dev_ptr - pointer to allocated device memory
- * @param [in]  size    - requested allocation size in bytes
- * @param [in]  flags   - must be either hipMemAttachGlobal or hipMemAttachHost
- *                        (defaults to hipMemAttachGlobal)
- *
- * @returns #hipSuccess, #hipErrorMemoryAllocation, #hipErrorNotSupported, #hipErrorInvalidValue
- */
-hipError_t hipMallocManaged(void** dev_ptr,
-                            size_t size,
-                            unsigned int flags __dparm(hipMemAttachGlobal));
-
-/**
- * @brief Prefetches memory to the specified destination device using AMD HMM.
- *
- * @param [in] dev_ptr  pointer to be prefetched
- * @param [in] count    size in bytes for prefetching
- * @param [in] device   destination device to prefetch to
- * @param [in] stream   stream to enqueue prefetch operation
- *
- * @returns #hipSuccess, #hipErrorInvalidValue
- */
-hipError_t hipMemPrefetchAsync(const void* dev_ptr,
-                               size_t count,
-                               int device,
-                               hipStream_t stream __dparm(0));
-
-/**
- * @brief Advise about the usage of a given memory range to AMD HMM.
- *
- * @param [in] dev_ptr  pointer to memory to set the advice for
- * @param [in] count    size in bytes of the memory range
- * @param [in] advice   advice to be applied for the specified memory range
- * @param [in] device   device to apply the advice for
- *
- * @returns #hipSuccess, #hipErrorInvalidValue
- */
-hipError_t hipMemAdvise(const void* dev_ptr,
-                        size_t count,
-                        hipMemoryAdvise advice,
-                        int device);
-
-/**
- * @brief Query an attribute of a given memory range in AMD HMM.
- *
- * @param [in/out] data   a pointer to a memory location where the result of each
- *                        attribute query will be written to
- * @param [in] data_size  the size of data
- * @param [in] attribute  the attribute to query
- * @param [in] dev_ptr    start of the range to query
- * @param [in] count      size of the range to query
- *
- * @returns #hipSuccess, #hipErrorInvalidValue
- */
-hipError_t hipMemRangeGetAttribute(void* data,
-                                   size_t data_size,
-                                   hipMemRangeAttribute attribute,
-                                   const void* dev_ptr,
-                                   size_t count);
-
-/**
- * @brief Query attributes of a given memory range in AMD HMM.
- *
- * @param [in/out] data     a two-dimensional array containing pointers to memory locations
- *                          where the result of each attribute query will be written to
- * @param [in] data_sizes   an array, containing the sizes of each result
- * @param [in] attributes   the attribute to query
- * @param [in] num_attributes  an array of attributes to query (numAttributes and the number
- *                          of attributes in this array should match)
- * @param [in] dev_ptr      start of the range to query
- * @param [in] count        size of the range to query
- *
- * @returns #hipSuccess, #hipErrorInvalidValue
- */
-hipError_t hipMemRangeGetAttributes(void** data,
-                                    size_t* data_sizes,
-                                    hipMemRangeAttribute* attributes,
-                                    size_t num_attributes,
-                                    const void* dev_ptr,
-                                    size_t count);
-
-/**
- * @brief Attach memory to a stream asynchronously in AMD HMM.
- *
- * @param [in] stream     - stream in which to enqueue the attach operation
- * @param [in] dev_ptr    - pointer to memory (must be a pointer to managed memory or
- *                          to a valid host-accessible region of system-allocated memory)
- * @param [in] length     - length of memory (defaults to zero)
- * @param [in] flags      - must be one of cudaMemAttachGlobal, cudaMemAttachHost or
- *                          cudaMemAttachSingle (defaults to cudaMemAttachSingle)
- *
- * @returns #hipSuccess, #hipErrorInvalidValue
- */
-hipError_t hipStreamAttachMemAsync(hipStream_t stream,
-                                   hipDeviceptr_t* dev_ptr,
-                                   size_t length __dparm(0),
-                                   unsigned int flags __dparm(hipMemAttachSingle));
-
-// end doxygen Managed Memory
-/**
- * @}
- */
-
-/**
- *  @brief Allocate device accessible page locked host memory [Deprecated]
- *
- *  @param[out] ptr Pointer to the allocated host pinned memory
- *  @param[in]  size Requested memory size
- *  @param[in]  flags Type of host memory allocation
- *
- *  If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.
- *
- *  @return #hipSuccess, #hipErrorOutOfMemory
- *
- *  @deprecated use hipHostMalloc() instead
- */
-DEPRECATED("use hipHostMalloc instead")
-hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags);
-
-/**
- *  @brief Get Device pointer from Host Pointer allocated through hipHostMalloc
- *
- *  @param[out] dstPtr Device Pointer mapped to passed host pointer
- *  @param[in]  hstPtr Host Pointer allocated through hipHostMalloc
- *  @param[in]  flags Flags to be passed for extension
- *
- *  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorOutOfMemory
- *
- *  @see hipSetDeviceFlags, hipHostMalloc
- */
-hipError_t hipHostGetDevicePointer(void** devPtr, void* hstPtr, unsigned int flags);
-
-/**
- *  @brief Return flags associated with host pointer
- *
- *  @param[out] flagsPtr Memory location to store flags
- *  @param[in]  hostPtr Host Pointer allocated through hipHostMalloc
- *  @return #hipSuccess, #hipErrorInvalidValue
- *
- *  @see hipHostMalloc
- */
-hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr);
-
-/**
- *  @brief Register host memory so it can be accessed from the current device.
- *
- *  @param[out] hostPtr Pointer to host memory to be registered.
- *  @param[in] sizeBytes size of the host memory
- *  @param[in] flags.  See below.
- *
- *  Flags:
- *  - #hipHostRegisterDefault   Memory is Mapped and Portable
- *  - #hipHostRegisterPortable  Memory is considered registered by all contexts.  HIP only supports
- * one context so this is always assumed true.
- *  - #hipHostRegisterMapped    Map the allocation into the address space for the current device.
- * The device pointer can be obtained with #hipHostGetDevicePointer.
- *
- *
- *  After registering the memory, use #hipHostGetDevicePointer to obtain the mapped device pointer.
- *  On many systems, the mapped device pointer will have a different value than the mapped host
- * pointer.  Applications must use the device pointer in device code, and the host pointer in device
- * code.
- *
- *  On some systems, registered memory is pinned.  On some systems, registered memory may not be
- * actually be pinned but uses OS or hardware facilities to all GPU access to the host memory.
- *
- *  Developers are strongly encouraged to register memory blocks which are aligned to the host
- * cache-line size. (typically 64-bytes but can be obtains from the CPUID instruction).
- *
- *  If registering non-aligned pointers, the application must take care when register pointers from
- * the same cache line on different devices.  HIP's coarse-grained synchronization model does not
- * guarantee correct results if different devices write to different parts of the same cache block -
- * typically one of the writes will "win" and overwrite data from the other registered memory
- * region.
- *
- *  @return #hipSuccess, #hipErrorOutOfMemory
- *
- *  @see hipHostUnregister, hipHostGetFlags, hipHostGetDevicePointer
- */
-hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags);
-
-/**
- *  @brief Un-register host pointer
- *
- *  @param[in] hostPtr Host pointer previously registered with #hipHostRegister
- *  @return Error code
- *
- *  @see hipHostRegister
- */
-hipError_t hipHostUnregister(void* hostPtr);
-
-/**
- *  Allocates at least width (in bytes) * height bytes of linear memory
- *  Padding may occur to ensure alighnment requirements are met for the given row
- *  The change in width size due to padding will be returned in *pitch.
- *  Currently the alignment is set to 128 bytes
- *
- *  @param[out] ptr Pointer to the allocated device memory
- *  @param[out] pitch Pitch for allocation (in bytes)
- *  @param[in]  width Requested pitched allocation width (in bytes)
- *  @param[in]  height Requested pitched allocation height
- *
- *  If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.
- *
- *  @return Error code
- *
- *  @see hipMalloc, hipFree, hipMallocArray, hipFreeArray, hipHostFree, hipMalloc3D,
- * hipMalloc3DArray, hipHostMalloc
- */
-
-hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height);
-
-/**
- *  Allocates at least width (in bytes) * height bytes of linear memory
- *  Padding may occur to ensure alighnment requirements are met for the given row
- *  The change in width size due to padding will be returned in *pitch.
- *  Currently the alignment is set to 128 bytes
- *
- *  @param[out] dptr Pointer to the allocated device memory
- *  @param[out] pitch Pitch for allocation (in bytes)
- *  @param[in]  width Requested pitched allocation width (in bytes)
- *  @param[in]  height Requested pitched allocation height
- *
- *  If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.
- *  The intended usage of pitch is as a separate parameter of the allocation, used to compute addresses within the 2D array.
- *  Given the row and column of an array element of type T, the address is computed as:
- *  T* pElement = (T*)((char*)BaseAddress + Row * Pitch) + Column;
- *
- *  @return Error code
- *
- *  @see hipMalloc, hipFree, hipMallocArray, hipFreeArray, hipHostFree, hipMalloc3D,
- * hipMalloc3DArray, hipHostMalloc
- */
-
-hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr, size_t* pitch, size_t widthInBytes, size_t height, unsigned int elementSizeBytes);
-
-/**
- *  @brief Free memory allocated by the hcc hip memory allocation API.
- *  This API performs an implicit hipDeviceSynchronize() call.
- *  If pointer is NULL, the hip runtime is initialized and hipSuccess is returned.
- *
- *  @param[in] ptr Pointer to memory to be freed
- *  @return #hipSuccess
- *  @return #hipErrorInvalidDevicePointer (if pointer is invalid, including host pointers allocated
- * with hipHostMalloc)
- *
- *  @see hipMalloc, hipMallocPitch, hipMallocArray, hipFreeArray, hipHostFree, hipMalloc3D,
- * hipMalloc3DArray, hipHostMalloc
- */
-hipError_t hipFree(void* ptr);
-
-/**
- *  @brief Free memory allocated by the hcc hip host memory allocation API.  [Deprecated]
- *
- *  @param[in] ptr Pointer to memory to be freed
- *  @return #hipSuccess,
- *          #hipErrorInvalidValue (if pointer is invalid, including device pointers allocated with
- hipMalloc)
-
- *  @deprecated use hipHostFree() instead
- */
-DEPRECATED("use hipHostFree instead")
-hipError_t hipFreeHost(void* ptr);
-
-/**
- *  @brief Free memory allocated by the hcc hip host memory allocation API
- *  This API performs an implicit hipDeviceSynchronize() call.
- *  If pointer is NULL, the hip runtime is initialized and hipSuccess is returned.
- *
- *  @param[in] ptr Pointer to memory to be freed
- *  @return #hipSuccess,
- *          #hipErrorInvalidValue (if pointer is invalid, including device pointers allocated with
- * hipMalloc)
- *
- *  @see hipMalloc, hipMallocPitch, hipFree, hipMallocArray, hipFreeArray, hipMalloc3D,
- * hipMalloc3DArray, hipHostMalloc
- */
-hipError_t hipHostFree(void* ptr);
-
-/**
- *  @brief Copy data from src to dst.
- *
- *  It supports memory from host to device,
- *  device to host, device to device and host to host
- *  The src and dst must not overlap.
- *
- *  For hipMemcpy, the copy is always performed by the current device (set by hipSetDevice).
- *  For multi-gpu or peer-to-peer configurations, it is recommended to set the current device to the
- *  device where the src data is physically located. For optimal peer-to-peer copies, the copy device
- *  must be able to access the src and dst pointers (by calling hipDeviceEnablePeerAccess with copy
- *  agent as the current device and src/dest as the peerDevice argument.  if this is not done, the
- *  hipMemcpy will still work, but will perform the copy using a staging buffer on the host.
- *  Calling hipMemcpy with dst and src pointers that do not match the hipMemcpyKind results in
- *  undefined behavior.
- *
- *  @param[out]  dst Data being copy to
- *  @param[in]  src Data being copy from
- *  @param[in]  sizeBytes Data size in bytes
- *  @param[in]  copyType Memory copy type
- *  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknowni
- *
- *  @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost,
- * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA,
- * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD,
- * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync,
- * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo,
- * hipMemHostAlloc, hipMemHostGetDevicePointer
- */
-hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind);
-
-// TODO: Add description
-hipError_t hipMemcpyWithStream(void* dst, const void* src, size_t sizeBytes,
-                               hipMemcpyKind kind, hipStream_t stream);
-/**
- *  @brief Copy data from Host to Device
- *
- *  @param[out]  dst Data being copy to
- *  @param[in]   src Data being copy from
- *  @param[in]   sizeBytes Data size in bytes
- *
- *  @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext,
- * #hipErrorInvalidValue
- *
- *  @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost,
- * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA,
- * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD,
- * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync,
- * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo,
- * hipMemHostAlloc, hipMemHostGetDevicePointer
- */
-hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes);
-
-/**
- *  @brief Copy data from Device to Host
- *
- *  @param[out]  dst Data being copy to
- *  @param[in]   src Data being copy from
- *  @param[in]   sizeBytes Data size in bytes
- *
- *  @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext,
- * #hipErrorInvalidValue
- *
- *  @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost,
- * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA,
- * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD,
- * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync,
- * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo,
- * hipMemHostAlloc, hipMemHostGetDevicePointer
- */
-hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes);
-
-/**
- *  @brief Copy data from Device to Device
- *
- *  @param[out]  dst Data being copy to
- *  @param[in]   src Data being copy from
- *  @param[in]   sizeBytes Data size in bytes
- *
- *  @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext,
- * #hipErrorInvalidValue
- *
- *  @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost,
- * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA,
- * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD,
- * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync,
- * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo,
- * hipMemHostAlloc, hipMemHostGetDevicePointer
- */
-hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes);
-
-/**
- *  @brief Copy data from Host to Device asynchronously
- *
- *  @param[out]  dst Data being copy to
- *  @param[in]   src Data being copy from
- *  @param[in]   sizeBytes Data size in bytes
- *
- *  @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext,
- * #hipErrorInvalidValue
- *
- *  @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost,
- * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA,
- * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD,
- * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync,
- * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo,
- * hipMemHostAlloc, hipMemHostGetDevicePointer
- */
-hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t sizeBytes, hipStream_t stream);
-
-/**
- *  @brief Copy data from Device to Host asynchronously
- *
- *  @param[out]  dst Data being copy to
- *  @param[in]   src Data being copy from
- *  @param[in]   sizeBytes Data size in bytes
- *
- *  @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext,
- * #hipErrorInvalidValue
- *
- *  @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost,
- * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA,
- * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD,
- * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync,
- * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo,
- * hipMemHostAlloc, hipMemHostGetDevicePointer
- */
-hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream);
-
-/**
- *  @brief Copy data from Device to Device asynchronously
- *
- *  @param[out]  dst Data being copy to
- *  @param[in]   src Data being copy from
- *  @param[in]   sizeBytes Data size in bytes
- *
- *  @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext,
- * #hipErrorInvalidValue
- *
- *  @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost,
- * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA,
- * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD,
- * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync,
- * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo,
- * hipMemHostAlloc, hipMemHostGetDevicePointer
- */
-hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes,
-                              hipStream_t stream);
-
-#if __HIP_ROCclr__
-hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes,
-    hipModule_t hmod, const char* name);
-
-hipError_t hipGetSymbolAddress(void** devPtr, const void* symbol);
-hipError_t hipGetSymbolSize(size_t* size, const void* symbol);
-hipError_t hipMemcpyToSymbol(const void* symbol, const void* src,
-                             size_t sizeBytes, size_t offset __dparm(0),
-                             hipMemcpyKind kind __dparm(hipMemcpyHostToDevice));
-hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src,
-                                  size_t sizeBytes, size_t offset,
-                                  hipMemcpyKind kind, hipStream_t stream __dparm(0));
-hipError_t hipMemcpyFromSymbol(void* dst, const void* symbol,
-                               size_t sizeBytes, size_t offset __dparm(0),
-                               hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost));
-hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbol,
-                                    size_t sizeBytes, size_t offset,
-                                    hipMemcpyKind kind,
-                                    hipStream_t stream __dparm(0));
-#else
-hipError_t hipModuleGetGlobal(void**, size_t*, hipModule_t, const char*);
-
-#ifdef __cplusplus //Start : Not supported in gcc
-namespace hip_impl {
-inline
-__attribute__((visibility("hidden")))
-hipError_t read_agent_global_from_process(hipDeviceptr_t* dptr, size_t* bytes,
-                                          const char* name);
-} // Namespace hip_impl.
-
-
-/**
- *  @brief Copies the memory address of symbol @p symbolName to @p devPtr
- *
- * @param[in]  symbolName - Symbol on device
- * @param[out] devPtr - Pointer to a pointer to the memory referred to by the symbol
- * @return #hipSuccess, #hipErrorNotInitialized, #hipErrorNotFound
- *
- *  @see hipGetSymbolSize, hipMemcpyToSymbol, hipMemcpyFromSymbol, hipMemcpyToSymbolAsync,
- * hipMemcpyFromSymbolAsync
- */
-inline
-__attribute__((visibility("hidden")))
-hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) {
-    //HIP_INIT_API(hipGetSymbolAddress, devPtr, symbolName);
-    hip_impl::hip_init();
-    size_t size = 0;
-    return hip_impl::read_agent_global_from_process(devPtr, &size, (const char*)symbolName);
-}
-
-
-/**
- *  @brief Copies the size of symbol @p symbolName to @p size
- *
- * @param[in]  symbolName - Symbol on device
- * @param[out] size - Pointer to the size of the symbol
- * @return #hipSuccess, #hipErrorNotInitialized, #hipErrorNotFound
- *
- *  @see hipGetSymbolSize, hipMemcpyToSymbol, hipMemcpyFromSymbol, hipMemcpyToSymbolAsync,
- * hipMemcpyFromSymbolAsync
- */
-inline
-__attribute__((visibility("hidden")))
-hipError_t hipGetSymbolSize(size_t* size, const void* symbolName) {
-    // HIP_INIT_API(hipGetSymbolSize, size, symbolName);
-    hip_impl::hip_init();
-    void* devPtr = nullptr;
-    return hip_impl::read_agent_global_from_process(&devPtr, size, (const char*)symbolName);
-}
-#endif // End : Not supported in gcc
-
-#if defined(__cplusplus)
-} // extern "C"
-#endif
-
-#ifdef __cplusplus
-namespace hip_impl {
-hipError_t hipMemcpyToSymbol(void*, const void*, size_t, size_t, hipMemcpyKind,
-                             const char*);
-} // Namespace hip_impl.
-#endif
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-/**
- *  @brief Copies @p sizeBytes bytes from the memory area pointed to by @p src to the memory area
- * pointed to by @p offset bytes from the start of symbol @p symbol.
- *
- *  The memory areas may not overlap. Symbol can either be a variable that resides in global or
- * constant memory space, or it can be a character string, naming a variable that resides in global
- * or constant memory space. Kind can be either hipMemcpyHostToDevice or hipMemcpyDeviceToDevice
- *  TODO: cudaErrorInvalidSymbol and cudaErrorInvalidMemcpyDirection is not supported, use
- * hipErrorUnknown for now.
- *
- *  @param[in]  symbolName - Symbol destination on device
- *  @param[in]  src - Data being copy from
- *  @param[in]  sizeBytes - Data size in bytes
- *  @param[in]  offset - Offset from start of symbol in bytes
- *  @param[in]  kind - Type of transfer
- *  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown
- *
- *  @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray,
- * hipMemcpy2DFromArray, hipMemcpyArrayToArray, hipMemcpy2DArrayToArray, hipMemcpyFromSymbol,
- * hipMemcpyAsync, hipMemcpy2DAsync, hipMemcpyToArrayAsync, hipMemcpy2DToArrayAsync,
- * hipMemcpyFromArrayAsync, hipMemcpy2DFromArrayAsync, hipMemcpyToSymbolAsync,
- * hipMemcpyFromSymbolAsync
- */
-#ifdef __cplusplus
-inline
-__attribute__((visibility("hidden")))
-hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src,
-                             size_t sizeBytes, size_t offset __dparm(0),
-                             hipMemcpyKind kind __dparm(hipMemcpyHostToDevice)) {
-    if (!symbolName) return hipErrorInvalidSymbol;
-
-    hipDeviceptr_t dst = NULL;
-    hipGetSymbolAddress(&dst, (const char*)symbolName);
-
-    return hip_impl::hipMemcpyToSymbol(dst, src, sizeBytes, offset, kind,
-                                       (const char*)symbolName);
-}
-#endif
-
-#if defined(__cplusplus)
-} // extern "C"
-#endif
-
-#ifdef __cplusplus
-namespace hip_impl {
-hipError_t hipMemcpyToSymbolAsync(void*, const void*, size_t, size_t,
-                                  hipMemcpyKind, hipStream_t, const char*);
-hipError_t hipMemcpyFromSymbol(void*, const void*, size_t, size_t,
-                               hipMemcpyKind, const char*);
-hipError_t hipMemcpyFromSymbolAsync(void*, const void*, size_t, size_t,
-                                    hipMemcpyKind, hipStream_t, const char*);
-} // Namespace hip_impl.
-#endif
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-/**
- *  @brief Copies @p sizeBytes bytes from the memory area pointed to by @p src to the memory area
- * pointed to by @p offset bytes from the start of symbol @p symbol
- *
- *  The memory areas may not overlap. Symbol can either be a variable that resides in global or
- * constant memory space, or it can be a character string, naming a variable that resides in global
- * or constant memory space. Kind can be either hipMemcpyHostToDevice or hipMemcpyDeviceToDevice
- *  hipMemcpyToSymbolAsync() is asynchronous with respect to the host, so the call may return before
- * copy is complete.
- *  TODO: cudaErrorInvalidSymbol and cudaErrorInvalidMemcpyDirection is not supported, use
- * hipErrorUnknown for now.
- *
- *  @param[in]  symbolName - Symbol destination on device
- *  @param[in]  src - Data being copy from
- *  @param[in]  sizeBytes - Data size in bytes
- *  @param[in]  offset - Offset from start of symbol in bytes
- *  @param[in]  kind - Type of transfer
- *  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown
- *
- *  @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray,
- * hipMemcpy2DFromArray, hipMemcpyArrayToArray, hipMemcpy2DArrayToArray, hipMemcpyFromSymbol,
- * hipMemcpyAsync, hipMemcpy2DAsync, hipMemcpyToArrayAsync, hipMemcpy2DToArrayAsync,
- * hipMemcpyFromArrayAsync, hipMemcpy2DFromArrayAsync, hipMemcpyToSymbolAsync,
- * hipMemcpyFromSymbolAsync
- */
-
-#ifdef __cplusplus //Start : Not supported in gcc
-inline
-__attribute__((visibility("hidden")))
-hipError_t hipMemcpyToSymbolAsync(const void* symbolName, const void* src,
-                                  size_t sizeBytes, size_t offset,
-                                  hipMemcpyKind kind, hipStream_t stream __dparm(0)) {
-    if (!symbolName) return hipErrorInvalidSymbol;
-
-    hipDeviceptr_t dst = NULL;
-    hipGetSymbolAddress(&dst, symbolName);
-
-    return hip_impl::hipMemcpyToSymbolAsync(dst, src, sizeBytes, offset, kind,
-                                            stream,
-                                            (const char*)symbolName);
-}
-
-inline
-__attribute__((visibility("hidden")))
-hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName,
-                               size_t sizeBytes, size_t offset __dparm(0),
-                               hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)) {
-    if (!symbolName) return hipErrorInvalidSymbol;
-
-    hipDeviceptr_t src = NULL;
-    hipGetSymbolAddress(&src, symbolName);
-
-    return hip_impl::hipMemcpyFromSymbol(dst, src, sizeBytes, offset, kind,
-                                         (const char*)symbolName);
-}
-
-inline
-__attribute__((visibility("hidden")))
-hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName,
-                                    size_t sizeBytes, size_t offset,
-                                    hipMemcpyKind kind,
-                                    hipStream_t stream __dparm(0)) {
-    if (!symbolName) return hipErrorInvalidSymbol;
-
-    hipDeviceptr_t src = NULL;
-    hipGetSymbolAddress(&src, symbolName);
-
-    return hip_impl::hipMemcpyFromSymbolAsync(dst, src, sizeBytes, offset, kind,
-                                              stream,
-                                              (const char*)symbolName);
-}
-#endif // End : Not supported in gcc
-
-#endif // __HIP_ROCclr__
-/**
- *  @brief Copy data from src to dst asynchronously.
- *
- *  @warning If host or dest are not pinned, the memory copy will be performed synchronously.  For
- * best performance, use hipHostMalloc to allocate host memory that is transferred asynchronously.
- *
- *  @warning on HCC hipMemcpyAsync does not support overlapped H2D and D2H copies.
- *  For hipMemcpy, the copy is always performed by the device associated with the specified stream.
- *
- *  For multi-gpu or peer-to-peer configurations, it is recommended to use a stream which is a
- * attached to the device where the src data is physically located. For optimal peer-to-peer copies,
- * the copy device must be able to access the src and dst pointers (by calling
- * hipDeviceEnablePeerAccess with copy agent as the current device and src/dest as the peerDevice
- * argument.  if this is not done, the hipMemcpy will still work, but will perform the copy using a
- * staging buffer on the host.
- *
- *  @param[out] dst Data being copy to
- *  @param[in]  src Data being copy from
- *  @param[in]  sizeBytes Data size in bytes
- *  @param[in]  accelerator_view Accelerator view which the copy is being enqueued
- *  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown
- *
- *  @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray,
- * hipMemcpy2DFromArray, hipMemcpyArrayToArray, hipMemcpy2DArrayToArray, hipMemcpyToSymbol,
- * hipMemcpyFromSymbol, hipMemcpy2DAsync, hipMemcpyToArrayAsync, hipMemcpy2DToArrayAsync,
- * hipMemcpyFromArrayAsync, hipMemcpy2DFromArrayAsync, hipMemcpyToSymbolAsync,
- * hipMemcpyFromSymbolAsync
- */
-hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind,
-                          hipStream_t stream __dparm(0));
-
-/**
- *  @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant
- * byte value value.
- *
- *  @param[out] dst Data being filled
- *  @param[in]  constant value to be set
- *  @param[in]  sizeBytes Data size in bytes
- *  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized
- */
-hipError_t hipMemset(void* dst, int value, size_t sizeBytes);
-
-/**
- *  @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant
- * byte value value.
- *
- *  @param[out] dst Data ptr to be filled
- *  @param[in]  constant value to be set
- *  @param[in]  number of values to be set
- *  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized
- */
-hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t count);
-
-/**
- *  @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant
- * byte value value.
- *
- * hipMemsetD8Async() is asynchronous with respect to the host, so the call may return before the
- * memset is complete. The operation can optionally be associated to a stream by passing a non-zero
- * stream argument. If stream is non-zero, the operation may overlap with operations in other
- * streams.
- *
- *  @param[out] dst Data ptr to be filled
- *  @param[in]  constant value to be set
- *  @param[in]  number of values to be set
- *  @param[in]  stream - Stream identifier
- *  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized
- */
-hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t count, hipStream_t stream __dparm(0));
-
-/**
- *  @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant
- * short value value.
- *
- *  @param[out] dst Data ptr to be filled
- *  @param[in]  constant value to be set
- *  @param[in]  number of values to be set
- *  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized
- */
-hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t count);
-
-/**
- *  @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant
- * short value value.
- *
- * hipMemsetD16Async() is asynchronous with respect to the host, so the call may return before the
- * memset is complete. The operation can optionally be associated to a stream by passing a non-zero
- * stream argument. If stream is non-zero, the operation may overlap with operations in other
- * streams.
- *
- *  @param[out] dst Data ptr to be filled
- *  @param[in]  constant value to be set
- *  @param[in]  number of values to be set
- *  @param[in]  stream - Stream identifier
- *  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized
- */
-hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t count, hipStream_t stream __dparm(0));
-
-/**
- *  @brief Fills the memory area pointed to by dest with the constant integer
- * value for specified number of times.
- *
- *  @param[out] dst Data being filled
- *  @param[in]  constant value to be set
- *  @param[in]  number of values to be set
- *  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized
- */
-hipError_t hipMemsetD32(hipDeviceptr_t dest, int value, size_t count);
-
-/**
- *  @brief Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant
- * byte value value.
- *
- *  hipMemsetAsync() is asynchronous with respect to the host, so the call may return before the
- * memset is complete. The operation can optionally be associated to a stream by passing a non-zero
- * stream argument. If stream is non-zero, the operation may overlap with operations in other
- * streams.
- *
- *  @param[out] dst Pointer to device memory
- *  @param[in]  value - Value to set for each byte of specified memory
- *  @param[in]  sizeBytes - Size in bytes to set
- *  @param[in]  stream - Stream identifier
- *  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree
- */
-hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream __dparm(0));
-
-/**
- *  @brief Fills the memory area pointed to by dev with the constant integer
- * value for specified number of times.
- *
- *  hipMemsetD32Async() is asynchronous with respect to the host, so the call may return before the
- * memset is complete. The operation can optionally be associated to a stream by passing a non-zero
- * stream argument. If stream is non-zero, the operation may overlap with operations in other
- * streams.
- *
- *  @param[out] dst Pointer to device memory
- *  @param[in]  value - Value to set for each byte of specified memory
- *  @param[in]  count - number of values to be set
- *  @param[in]  stream - Stream identifier
- *  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree
- */
-hipError_t hipMemsetD32Async(hipDeviceptr_t dst, int value, size_t count,
-                             hipStream_t stream __dparm(0));
-
-/**
- *  @brief Fills the memory area pointed to by dst with the constant value.
- *
- *  @param[out] dst Pointer to device memory
- *  @param[in]  pitch - data size in bytes
- *  @param[in]  value - constant value to be set
- *  @param[in]  width
- *  @param[in]  height
- *  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree
- */
-
-hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height);
-
-/**
- *  @brief Fills asynchronously the memory area pointed to by dst with the constant value.
- *
- *  @param[in]  dst Pointer to device memory
- *  @param[in]  pitch - data size in bytes
- *  @param[in]  value - constant value to be set
- *  @param[in]  width
- *  @param[in]  height
- *  @param[in]  stream
- *  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree
- */
-
-hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height,hipStream_t stream __dparm(0));
-
-/**
- *  @brief Fills synchronously the memory area pointed to by pitchedDevPtr with the constant value.
- *
- *  @param[in] pitchedDevPtr
- *  @param[in]  value - constant value to be set
- *  @param[in]  extent
- *  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree
- */
-hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int  value, hipExtent extent );
-
-/**
- *  @brief Fills asynchronously the memory area pointed to by pitchedDevPtr with the constant value.
- *
- *  @param[in] pitchedDevPtr
- *  @param[in]  value - constant value to be set
- *  @param[in]  extent
- *  @param[in]  stream
- *  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree
- */
-hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int  value, hipExtent extent ,hipStream_t stream __dparm(0));
-
-/**
- * @brief Query memory info.
- * Return snapshot of free memory, and total allocatable memory on the device.
- *
- * Returns in *free a snapshot of the current free memory.
- * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue
- * @warning On HCC, the free memory only accounts for memory allocated by this process and may be
- *optimistic.
- **/
-hipError_t hipMemGetInfo(size_t* free, size_t* total);
-
-
-hipError_t hipMemPtrGetInfo(void* ptr, size_t* size);
-
-
-/**
- *  @brief Allocate an array on the device.
- *
- *  @param[out]  array  Pointer to allocated array in device memory
- *  @param[in]   desc   Requested channel format
- *  @param[in]   width  Requested array allocation width
- *  @param[in]   height Requested array allocation height
- *  @param[in]   flags  Requested properties of allocated array
- *  @return      #hipSuccess, #hipErrorOutOfMemory
- *
- *  @see hipMalloc, hipMallocPitch, hipFree, hipFreeArray, hipHostMalloc, hipHostFree
- */
-hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, size_t width,
-                          size_t height __dparm(0), unsigned int flags __dparm(hipArrayDefault));
-hipError_t hipArrayCreate(hipArray** pHandle, const HIP_ARRAY_DESCRIPTOR* pAllocateArray);
-
-hipError_t hipArray3DCreate(hipArray** array, const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray);
-
-hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent);
-
-/**
- *  @brief Frees an array on the device.
- *
- *  @param[in]  array  Pointer to array to free
- *  @return     #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized
- *
- *  @see hipMalloc, hipMallocPitch, hipFree, hipMallocArray, hipHostMalloc, hipHostFree
- */
-hipError_t hipFreeArray(hipArray* array);
-
-/**
- * @brief Frees a mipmapped array on the device
- *
- * @param[in] mipmappedArray - Pointer to mipmapped array to free
- *
- * @return #hipSuccess, #hipErrorInvalidValue
- */
-hipError_t hipFreeMipmappedArray(hipMipmappedArray_t mipmappedArray);
-
-/**
- *  @brief Allocate an array on the device.
- *
- *  @param[out]  array  Pointer to allocated array in device memory
- *  @param[in]   desc   Requested channel format
- *  @param[in]   extent Requested array allocation width, height and depth
- *  @param[in]   flags  Requested properties of allocated array
- *  @return      #hipSuccess, #hipErrorOutOfMemory
- *
- *  @see hipMalloc, hipMallocPitch, hipFree, hipFreeArray, hipHostMalloc, hipHostFree
- */
-
-hipError_t hipMalloc3DArray(hipArray** array, const struct hipChannelFormatDesc* desc,
-                            struct hipExtent extent, unsigned int flags);
-
-/**
- * @brief Allocate a mipmapped array on the device
- *
- * @param[out] mipmappedArray  - Pointer to allocated mipmapped array in device memory
- * @param[in]  desc            - Requested channel format
- * @param[in]  extent          - Requested allocation size (width field in elements)
- * @param[in]  numLevels       - Number of mipmap levels to allocate
- * @param[in]  flags           - Flags for extensions
- *
- * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryAllocation
- */
-hipError_t hipMallocMipmappedArray(
-    hipMipmappedArray_t *mipmappedArray,
-    const struct hipChannelFormatDesc* desc,
-    struct hipExtent extent,
-    unsigned int numLevels,
-    unsigned int flags __dparm(0));
-
-/**
- * @brief Gets a mipmap level of a HIP mipmapped array
- *
- * @param[out] levelArray     - Returned mipmap level HIP array
- * @param[in]  mipmappedArray - HIP mipmapped array
- * @param[in]  level          - Mipmap level
- *
- * @return #hipSuccess, #hipErrorInvalidValue
- */
-hipError_t hipGetMipmappedArrayLevel(
-    hipArray_t *levelArray,
-    hipMipmappedArray_const_t mipmappedArray,
-    unsigned int level);
-
-/**
- *  @brief Copies data between host and device.
- *
- *  @param[in]   dst    Destination memory address
- *  @param[in]   dpitch Pitch of destination memory
- *  @param[in]   src    Source memory address
- *  @param[in]   spitch Pitch of source memory
- *  @param[in]   width  Width of matrix transfer (columns in bytes)
- *  @param[in]   height Height of matrix transfer (rows)
- *  @param[in]   kind   Type of transfer
- *  @return      #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
- * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
- *
- *  @see hipMemcpy, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, hipMemcpyToSymbol,
- * hipMemcpyAsync
- */
-hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width,
-                       size_t height, hipMemcpyKind kind);
-
-/**
- *  @brief Copies memory for 2D arrays.
- *  @param[in]   pCopy Parameters for the memory copy
- *  @return      #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
- *  #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
- *
- *  @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray,
- * hipMemcpyToSymbol, hipMemcpyAsync
-*/
-hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy);
-
-/**
- *  @brief Copies memory for 2D arrays.
- *  @param[in]   pCopy Parameters for the memory copy
- *  @param[in]   stream Stream to use
- *  @return      #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
- * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
- *
- *  @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray,
- * hipMemcpyToSymbol, hipMemcpyAsync
-*/
-hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, hipStream_t stream __dparm(0));
-
-/**
- *  @brief Copies data between host and device.
- *
- *  @param[in]   dst    Destination memory address
- *  @param[in]   dpitch Pitch of destination memory
- *  @param[in]   src    Source memory address
- *  @param[in]   spitch Pitch of source memory
- *  @param[in]   width  Width of matrix transfer (columns in bytes)
- *  @param[in]   height Height of matrix transfer (rows)
- *  @param[in]   kind   Type of transfer
- *  @param[in]   stream Stream to use
- *  @return      #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
- * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
- *
- *  @see hipMemcpy, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, hipMemcpyToSymbol,
- * hipMemcpyAsync
- */
-hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width,
-                            size_t height, hipMemcpyKind kind, hipStream_t stream __dparm(0));
-
-/**
- *  @brief Copies data between host and device.
- *
- *  @param[in]   dst     Destination memory address
- *  @param[in]   wOffset Destination starting X offset
- *  @param[in]   hOffset Destination starting Y offset
- *  @param[in]   src     Source memory address
- *  @param[in]   spitch  Pitch of source memory
- *  @param[in]   width   Width of matrix transfer (columns in bytes)
- *  @param[in]   height  Height of matrix transfer (rows)
- *  @param[in]   kind    Type of transfer
- *  @return      #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
- * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
- *
- *  @see hipMemcpy, hipMemcpyToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,
- * hipMemcpyAsync
- */
-hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src,
-                              size_t spitch, size_t width, size_t height, hipMemcpyKind kind);
-
-/**
- *  @brief Copies data between host and device.
- *
- *  @param[in]   dst     Destination memory address
- *  @param[in]   wOffset Destination starting X offset
- *  @param[in]   hOffset Destination starting Y offset
- *  @param[in]   src     Source memory address
- *  @param[in]   count   size in bytes to copy
- *  @param[in]   kind    Type of transfer
- *  @return      #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
- * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
- *
- *  @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,
- * hipMemcpyAsync
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src,
-                            size_t count, hipMemcpyKind kind);
-
-/**
- *  @brief Copies data between host and device.
- *
- *  @param[in]   dst       Destination memory address
- *  @param[in]   srcArray  Source memory address
- *  @param[in]   woffset   Source starting X offset
- *  @param[in]   hOffset   Source starting Y offset
- *  @param[in]   count     Size in bytes to copy
- *  @param[in]   kind      Type of transfer
- *  @return      #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
- * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
- *
- *  @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,
- * hipMemcpyAsync
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffset, size_t hOffset,
-                              size_t count, hipMemcpyKind kind);
-
-/**
- *  @brief Copies data between host and device.
- *
- *  @param[in]   dst       Destination memory address
- *  @param[in]   dpitch    Pitch of destination memory
- *  @param[in]   src       Source memory address
- *  @param[in]   wOffset   Source starting X offset
- *  @param[in]   hOffset   Source starting Y offset
- *  @param[in]   width     Width of matrix transfer (columns in bytes)
- *  @param[in]   height    Height of matrix transfer (rows)
- *  @param[in]   kind      Type of transfer
- *  @return      #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
- * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
- *
- *  @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,
- * hipMemcpyAsync
- */
-hipError_t hipMemcpy2DFromArray( void* dst, size_t dpitch, hipArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind);
-
-/**
- *  @brief Copies data between host and device asynchronously.
- *
- *  @param[in]   dst       Destination memory address
- *  @param[in]   dpitch    Pitch of destination memory
- *  @param[in]   src       Source memory address
- *  @param[in]   wOffset   Source starting X offset
- *  @param[in]   hOffset   Source starting Y offset
- *  @param[in]   width     Width of matrix transfer (columns in bytes)
- *  @param[in]   height    Height of matrix transfer (rows)
- *  @param[in]   kind      Type of transfer
- *  @param[in]   stream    Accelerator view which the copy is being enqueued
- *  @return      #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
- * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
- *
- *  @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,
- * hipMemcpyAsync
- */
-hipError_t hipMemcpy2DFromArrayAsync( void* dst, size_t dpitch, hipArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream __dparm(0));
-
-/**
- *  @brief Copies data between host and device.
- *
- *  @param[in]   dst       Destination memory address
- *  @param[in]   srcArray  Source array
- *  @param[in]   srcoffset Offset in bytes of source array
- *  @param[in]   count     Size of memory copy in bytes
- *  @return      #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
- * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
- *
- *  @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,
- * hipMemcpyAsync
- */
-hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t count);
-
-/**
- *  @brief Copies data between host and device.
- *
- *  @param[in]   dstArray   Destination memory address
- *  @param[in]   dstOffset  Offset in bytes of destination array
- *  @param[in]   srcHost    Source host pointer
- *  @param[in]   count      Size of memory copy in bytes
- *  @return      #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
- * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
- *
- *  @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,
- * hipMemcpyAsync
- */
-hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHost, size_t count);
-
-/**
- *  @brief Copies data between host and device.
- *
- *  @param[in]   p   3D memory copy parameters
- *  @return      #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
- * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
- *
- *  @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,
- * hipMemcpyAsync
- */
-hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p);
-
-/**
- *  @brief Copies data between host and device asynchronously.
- *
- *  @param[in]   p        3D memory copy parameters
- *  @param[in]   stream   Stream to use
- *  @return      #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
- * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
- *
- *  @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,
- * hipMemcpyAsync
- */
-hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms* p, hipStream_t stream __dparm(0));
-
-/**
- *  @brief Copies data between host and device.
- *
- *  @param[in]   pCopy   3D memory copy parameters
- *  @return      #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
- *  #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
- *
- *  @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,
- * hipMemcpyAsync
- */
-hipError_t hipDrvMemcpy3D(const HIP_MEMCPY3D* pCopy);
-
-/**
- *  @brief Copies data between host and device asynchronously.
- *
- *  @param[in]   pCopy    3D memory copy parameters
- *  @param[in]   stream   Stream to use
- *  @return      #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
- *  #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
- *
- *  @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,
- * hipMemcpyAsync
- */
-hipError_t hipDrvMemcpy3DAsync(const HIP_MEMCPY3D* pCopy, hipStream_t stream);
-
-// doxygen end Memory
-/**
- * @}
- */
-
-
-/**
- *-------------------------------------------------------------------------------------------------
- *-------------------------------------------------------------------------------------------------
- *  @defgroup PeerToPeer PeerToPeer Device Memory Access
- *  @{
- *  @warning PeerToPeer support is experimental.
- *  This section describes the PeerToPeer device memory access functions of HIP runtime API.
- */
-
-/**
- * @brief Determine if a device can access a peer's memory.
- *
- * @param [out] canAccessPeer Returns the peer access capability (0 or 1)
- * @param [in] device - device from where memory may be accessed.
- * @param [in] peerDevice - device where memory is physically located
- *
- * Returns "1" in @p canAccessPeer if the specified @p device is capable
- * of directly accessing memory physically located on peerDevice , or "0" if not.
- *
- * Returns "0" in @p canAccessPeer if deviceId == peerDeviceId, and both are valid devices : a
- * device is not a peer of itself.
- *
- * @returns #hipSuccess,
- * @returns #hipErrorInvalidDevice if deviceId or peerDeviceId are not valid devices
- */
-hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int deviceId, int peerDeviceId);
-
-
-/**
- * @brief Enable direct access from current device's virtual address space to memory allocations
- * physically located on a peer device.
- *
- * Memory which already allocated on peer device will be mapped into the address space of the
- * current device.  In addition, all future memory allocations on peerDeviceId will be mapped into
- * the address space of the current device when the memory is allocated. The peer memory remains
- * accessible from the current device until a call to hipDeviceDisablePeerAccess or hipDeviceReset.
- *
- *
- * @param [in] peerDeviceId
- * @param [in] flags
- *
- * Returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue,
- * @returns #hipErrorPeerAccessAlreadyEnabled if peer access is already enabled for this device.
- */
-hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags);
-
-
-/**
- * @brief Disable direct access from current device's virtual address space to memory allocations
- * physically located on a peer device.
- *
- * Returns hipErrorPeerAccessNotEnabled if direct access to memory on peerDevice has not yet been
- * enabled from the current device.
- *
- * @param [in] peerDeviceId
- *
- * @returns #hipSuccess, #hipErrorPeerAccessNotEnabled
- */
-hipError_t hipDeviceDisablePeerAccess(int peerDeviceId);
-
-/**
- * @brief Get information on memory allocations.
- *
- * @param [out] pbase - BAse pointer address
- * @param [out] psize - Size of allocation
- * @param [in]  dptr- Device Pointer
- *
- * @returns #hipSuccess, #hipErrorInvalidDevicePointer
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- */
-hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, hipDeviceptr_t dptr);
-
-#ifndef USE_PEER_NON_UNIFIED
-#define USE_PEER_NON_UNIFIED 1
-#endif
-
-#if USE_PEER_NON_UNIFIED == 1
-/**
- * @brief Copies memory from one device to memory on another device.
- *
- * @param [out] dst - Destination device pointer.
- * @param [in] dstDeviceId - Destination device
- * @param [in] src - Source device pointer
- * @param [in] srcDeviceId - Source device
- * @param [in] sizeBytes - Size of memory copy in bytes
- *
- * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice
- */
-hipError_t hipMemcpyPeer(void* dst, int dstDeviceId, const void* src, int srcDeviceId,
-                         size_t sizeBytes);
-
-/**
- * @brief Copies memory from one device to memory on another device.
- *
- * @param [out] dst - Destination device pointer.
- * @param [in] dstDevice - Destination device
- * @param [in] src - Source device pointer
- * @param [in] srcDevice - Source device
- * @param [in] sizeBytes - Size of memory copy in bytes
- * @param [in] stream - Stream identifier
- *
- * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice
- */
-hipError_t hipMemcpyPeerAsync(void* dst, int dstDeviceId, const void* src, int srcDevice,
-                              size_t sizeBytes, hipStream_t stream __dparm(0));
-#endif
-
-
-// doxygen end PeerToPeer
-/**
- * @}
- */
-
-
-/**
- *-------------------------------------------------------------------------------------------------
- *-------------------------------------------------------------------------------------------------
- *  @defgroup Context Context Management
- *  @{
- *  This section describes the context management functions of HIP runtime API.
- */
-
-/**
- *
- *  @addtogroup ContextD Context Management [Deprecated]
- *  @{
- *  @ingroup Context
- *  This section describes the deprecated context management functions of HIP runtime API.
- */
-
-/**
- * @brief Create a context and set it as current/ default context
- *
- * @param [out] ctx
- * @param [in] flags
- * @param [in] associated device handle
- *
- * @return #hipSuccess
- *
- * @see hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, hipCtxPushCurrent,
- * hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxCreate(hipCtx_t* ctx, unsigned int flags, hipDevice_t device);
-
-/**
- * @brief Destroy a HIP context.
- *
- * @param [in] ctx Context to destroy
- *
- * @returns #hipSuccess, #hipErrorInvalidValue
- *
- * @see hipCtxCreate, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,hipCtxSetCurrent,
- * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize , hipCtxGetDevice
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxDestroy(hipCtx_t ctx);
-
-/**
- * @brief Pop the current/default context and return the popped context.
- *
- * @param [out] ctx
- *
- * @returns #hipSuccess, #hipErrorInvalidContext
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxSetCurrent, hipCtxGetCurrent,
- * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxPopCurrent(hipCtx_t* ctx);
-
-/**
- * @brief Push the context to be set as current/ default context
- *
- * @param [in] ctx
- *
- * @returns #hipSuccess, #hipErrorInvalidContext
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize , hipCtxGetDevice
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxPushCurrent(hipCtx_t ctx);
-
-/**
- * @brief Set the passed context as current/default
- *
- * @param [in] ctx
- *
- * @returns #hipSuccess, #hipErrorInvalidContext
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize , hipCtxGetDevice
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxSetCurrent(hipCtx_t ctx);
-
-/**
- * @brief Get the handle of the current/ default context
- *
- * @param [out] ctx
- *
- * @returns #hipSuccess, #hipErrorInvalidContext
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetDevice, hipCtxGetFlags, hipCtxPopCurrent,
- * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxGetCurrent(hipCtx_t* ctx);
-
-/**
- * @brief Get the handle of the device associated with current/default context
- *
- * @param [out] device
- *
- * @returns #hipSuccess, #hipErrorInvalidContext
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize
- */
-
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxGetDevice(hipDevice_t* device);
-
-/**
- * @brief Returns the approximate HIP api version.
- *
- * @param [in]  ctx Context to check
- * @param [out] apiVersion
- *
- * @return #hipSuccess
- *
- * @warning The HIP feature set does not correspond to an exact CUDA SDK api revision.
- * This function always set *apiVersion to 4 as an approximation though HIP supports
- * some features which were introduced in later CUDA SDK revisions.
- * HIP apps code should not rely on the api revision number here and should
- * use arch feature flags to test device capabilities or conditional compilation.
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetDevice, hipCtxGetFlags, hipCtxPopCurrent,
- * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion);
-
-/**
- * @brief Set Cache configuration for a specific function
- *
- * @param [out] cacheConfiguration
- *
- * @return #hipSuccess
- *
- * @warning AMD devices and some Nvidia GPUS do not support reconfigurable cache.  This hint is
- * ignored on those architectures.
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxGetCacheConfig(hipFuncCache_t* cacheConfig);
-
-/**
- * @brief Set L1/Shared cache partition.
- *
- * @param [in] cacheConfiguration
- *
- * @return #hipSuccess
- *
- * @warning AMD devices and some Nvidia GPUS do not support reconfigurable cache.  This hint is
- * ignored on those architectures.
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig);
-
-/**
- * @brief Set Shared memory bank configuration.
- *
- * @param [in] sharedMemoryConfiguration
- *
- * @return #hipSuccess
- *
- * @warning AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is
- * ignored on those architectures.
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config);
-
-/**
- * @brief Get Shared memory bank configuration.
- *
- * @param [out] sharedMemoryConfiguration
- *
- * @return #hipSuccess
- *
- * @warning AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is
- * ignored on those architectures.
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig);
-
-/**
- * @brief Blocks until the default context has completed all preceding requested tasks.
- *
- * @return #hipSuccess
- *
- * @warning This function waits for all streams on the default context to complete execution, and
- * then returns.
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxGetDevice
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxSynchronize(void);
-
-/**
- * @brief Return flags used for creating default context.
- *
- * @param [out] flags
- *
- * @returns #hipSuccess
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxPopCurrent, hipCtxGetCurrent, hipCtxGetCurrent,
- * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxGetFlags(unsigned int* flags);
-
-/**
- * @brief Enables direct access to memory allocations in a peer context.
- *
- * Memory which already allocated on peer device will be mapped into the address space of the
- * current device.  In addition, all future memory allocations on peerDeviceId will be mapped into
- * the address space of the current device when the memory is allocated. The peer memory remains
- * accessible from the current device until a call to hipDeviceDisablePeerAccess or hipDeviceReset.
- *
- *
- * @param [in] peerCtx
- * @param [in] flags
- *
- * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue,
- * #hipErrorPeerAccessAlreadyEnabled
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- * @warning PeerToPeer support is experimental.
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags);
-
-/**
- * @brief Disable direct access from current context's virtual address space to memory allocations
- * physically located on a peer context.Disables direct access to memory allocations in a peer
- * context and unregisters any registered allocations.
- *
- * Returns hipErrorPeerAccessNotEnabled if direct access to memory on peerDevice has not yet been
- * enabled from the current device.
- *
- * @param [in] peerCtx
- *
- * @returns #hipSuccess, #hipErrorPeerAccessNotEnabled
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- * @warning PeerToPeer support is experimental.
- */
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx);
-
-// doxygen end Context deprecated
-/**
- * @}
- */
-
-/**
- * @brief Get the state of the primary context.
- *
- * @param [in] Device to get primary context flags for
- * @param [out] Pointer to store flags
- * @param [out] Pointer to store context state; 0 = inactive, 1 = active
- *
- * @returns #hipSuccess
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- */
-hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags, int* active);
-
-/**
- * @brief Release the primary context on the GPU.
- *
- * @param [in] Device which primary context is released
- *
- * @returns #hipSuccess
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- * @warning This function return #hipSuccess though doesn't release the primaryCtx by design on
- * HIP/HCC path.
- */
-hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev);
-
-/**
- * @brief Retain the primary context on the GPU.
- *
- * @param [out] Returned context handle of the new context
- * @param [in] Device which primary context is released
- *
- * @returns #hipSuccess
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- */
-hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev);
-
-/**
- * @brief Resets the primary context on the GPU.
- *
- * @param [in] Device which primary context is reset
- *
- * @returns #hipSuccess
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- */
-hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev);
-
-/**
- * @brief Set flags for the primary context.
- *
- * @param [in] Device for which the primary context flags are set
- * @param [in] New flags for the device
- *
- * @returns #hipSuccess, #hipErrorContextAlreadyInUse
- *
- * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
- * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
- */
-hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags);
-
-// doxygen end Context Management
-/**
- * @}
- */
-
-/**
- *
- *  @defgroup Module Module Management
- *  @{
- *  This section describes the module management functions of HIP runtime API.
- *
- */
-
-/**
- * @brief Loads code object from file into a hipModule_t
- *
- * @param [in] fname
- * @param [out] module
- *
- * @returns hipSuccess, hipErrorInvalidValue, hipErrorInvalidContext, hipErrorFileNotFound,
- * hipErrorOutOfMemory, hipErrorSharedObjectInitFailed, hipErrorNotInitialized
- *
- *
- */
-hipError_t hipModuleLoad(hipModule_t* module, const char* fname);
-
-/**
- * @brief Frees the module
- *
- * @param [in] module
- *
- * @returns hipSuccess, hipInvalidValue
- * module is freed and the code objects associated with it are destroyed
- *
- */
-
-hipError_t hipModuleUnload(hipModule_t module);
-
-/**
- * @brief Function with kname will be extracted if present in module
- *
- * @param [in] module
- * @param [in] kname
- * @param [out] function
- *
- * @returns hipSuccess, hipErrorInvalidValue, hipErrorInvalidContext, hipErrorNotInitialized,
- * hipErrorNotFound,
- */
-hipError_t hipModuleGetFunction(hipFunction_t* function, hipModule_t module, const char* kname);
-
-/**
- * @brief Find out attributes for a given function.
- *
- * @param [out] attr
- * @param [in] func
- *
- * @returns hipSuccess, hipErrorInvalidValue, hipErrorInvalidDeviceFunction
- */
-
-hipError_t hipFuncGetAttributes(struct hipFuncAttributes* attr, const void* func);
-
-/**
- * @brief Find out a specific attribute for a given function.
- *
- * @param [out] value
- * @param [in]  attrib
- * @param [in]  hfunc
- *
- * @returns hipSuccess, hipErrorInvalidValue, hipErrorInvalidDeviceFunction
- */
-hipError_t hipFuncGetAttribute(int* value, hipFunction_attribute attrib, hipFunction_t hfunc);
-
-#if !__HIP_ROCclr__
-#if defined(__cplusplus)
-} // extern "C"
-#endif
-
-#ifdef __cplusplus
-namespace hip_impl {
-    class agent_globals_impl;
-    class agent_globals {
-        public:
-            agent_globals();
-            ~agent_globals();
-            agent_globals(const agent_globals&) = delete;
-
-            hipError_t read_agent_global_from_module(hipDeviceptr_t* dptr, size_t* bytes,
-                    hipModule_t hmod, const char* name);
-            hipError_t read_agent_global_from_process(hipDeviceptr_t* dptr, size_t* bytes,
-                    const char* name);
-        private:
-            agent_globals_impl* impl;
-    };
-
-    inline
-    __attribute__((visibility("hidden")))
-    agent_globals& get_agent_globals() {
-        static agent_globals ag;
-        return ag;
-    }
-
-    extern "C"
-    inline
-    __attribute__((visibility("hidden")))
-    hipError_t read_agent_global_from_process(hipDeviceptr_t* dptr, size_t* bytes,
-        const char* name) {
-        return get_agent_globals().read_agent_global_from_process(dptr, bytes, name);
-    }
-} // Namespace hip_impl.
-#endif
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-/**
- * @brief returns device memory pointer and size of the kernel present in the module with symbol @p
- * name
- *
- * @param [out] dptr
- * @param [out] bytes
- * @param [in] hmod
- * @param [in] name
- *
- * @returns hipSuccess, hipErrorInvalidValue, hipErrorNotInitialized
- */
-hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes,
-                              hipModule_t hmod, const char* name);
-#endif // __HIP_ROCclr__
-
-/**
- * @brief returns the handle of the texture reference with the name from the module.
- *
- * @param [in] hmod
- * @param [in] name
- * @param [out] texRef
- *
- * @returns hipSuccess, hipErrorNotInitialized, hipErrorNotFound, hipErrorInvalidValue
- */
-hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const char* name);
-
-/**
- * @brief builds module from code object which resides in host memory. Image is pointer to that
- * location.
- *
- * @param [in] image
- * @param [out] module
- *
- * @returns hipSuccess, hipErrorNotInitialized, hipErrorOutOfMemory, hipErrorNotInitialized
- */
-hipError_t hipModuleLoadData(hipModule_t* module, const void* image);
-
-/**
- * @brief builds module from code object which resides in host memory. Image is pointer to that
- * location. Options are not used. hipModuleLoadData is called.
- *
- * @param [in] image
- * @param [out] module
- * @param [in] number of options
- * @param [in] options for JIT
- * @param [in] option values for JIT
- *
- * @returns hipSuccess, hipErrorNotInitialized, hipErrorOutOfMemory, hipErrorNotInitialized
- */
-hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image, unsigned int numOptions,
-                               hipJitOption* options, void** optionValues);
-
-/**
- * @brief launches kernel f with launch parameters and shared memory on stream with arguments passed
- * to kernelparams or extra
- *
- * @param [in] f         Kernel to launch.
- * @param [in] gridDimX  X grid dimension specified as multiple of blockDimX.
- * @param [in] gridDimY  Y grid dimension specified as multiple of blockDimY.
- * @param [in] gridDimZ  Z grid dimension specified as multiple of blockDimZ.
- * @param [in] blockDimX X block dimensions specified in work-items
- * @param [in] blockDimY Y grid dimension specified in work-items
- * @param [in] blockDimZ Z grid dimension specified in work-items
- * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel.  The
- * kernel can access this with HIP_DYNAMIC_SHARED.
- * @param [in] stream    Stream where the kernel should be dispatched.  May be 0, in which case th
- * default stream is used with associated synchronization rules.
- * @param [in] kernelParams
- * @param [in] extra     Pointer to kernel arguments.   These are passed directly to the kernel and
- * must be in the memory layout and alignment expected by the kernel.
- *
- * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue
- *
- * @warning kernellParams argument is not yet implemented in HIP. Please use extra instead. Please
- * refer to hip_porting_driver_api.md for sample usage.
- */
-hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY,
-                                 unsigned int gridDimZ, unsigned int blockDimX,
-                                 unsigned int blockDimY, unsigned int blockDimZ,
-                                 unsigned int sharedMemBytes, hipStream_t stream,
-                                 void** kernelParams, void** extra);
-
-
-#if __HIP_ROCclr__ && !defined(__HCC__)
-/**
- * @brief launches kernel f with launch parameters and shared memory on stream with arguments passed
- * to kernelparams or extra, where thread blocks can cooperate and synchronize as they execute
- *
- * @param [in] f         Kernel to launch.
- * @param [in] gridDim   Grid dimensions specified as multiple of blockDim.
- * @param [in] blockDim  Block dimensions specified in work-items
- * @param [in] kernelParams A list of kernel arguments
- * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel.  The
- * kernel can access this with HIP_DYNAMIC_SHARED.
- * @param [in] stream    Stream where the kernel should be dispatched.  May be 0, in which case th
- * default stream is used with associated synchronization rules.
- *
- * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue, hipErrorCooperativeLaunchTooLarge
- */
-hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDimX,
-                                      void** kernelParams, unsigned int sharedMemBytes,
-                                      hipStream_t stream);
-
-/**
- * @brief Launches kernels on multiple devices where thread blocks can cooperate and
- * synchronize as they execute.
- *
- * @param [in] hipLaunchParams          List of launch parameters, one per device.
- * @param [in] numDevices               Size of the launchParamsList array.
- * @param [in] flags                    Flags to control launch behavior.
- *
- * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue, hipErrorCooperativeLaunchTooLarge
- */
-hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
-                                                 int  numDevices, unsigned int  flags);
-
-#endif
-
-/**
- * @brief Launches kernels on multiple devices and guarantees all specified kernels are dispatched
- * on respective streams before enqueuing any other work on the specified streams from any other threads
- *
- *
- * @param [in] hipLaunchParams          List of launch parameters, one per device.
- * @param [in] numDevices               Size of the launchParamsList array.
- * @param [in] flags                    Flags to control launch behavior.
- *
- * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue
- */
-hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList,
-                                              int  numDevices, unsigned int  flags);
-
-
-// doxygen end Module
-/**
- * @}
- */
-
-/**
- *
- *  @defgroup Occupancy Occupancy
- *  @{
- *  This section describes the occupancy functions of HIP runtime API.
- *
- */
-
-/**
- * @brief determine the grid and block sizes to achieves maximum occupancy for a kernel
- *
- * @param [out] gridSize           minimum grid size for maximum potential occupancy
- * @param [out] blockSize          block size for maximum potential occupancy
- * @param [in]  f                  kernel function for which occupancy is calulated
- * @param [in]  dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block
- * @param [in]  blockSizeLimit     the maximum block size for the kernel, use 0 for no limit
- *
- * @returns hipSuccess, hipInvalidDevice, hipErrorInvalidValue
- */
-
-//TODO - Match CUoccupancyB2DSize
-hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
-                                             hipFunction_t f, size_t dynSharedMemPerBlk,
-                                             int blockSizeLimit);
-
-/**
- * @brief determine the grid and block sizes to achieves maximum occupancy for a kernel
- *
- * @param [out] gridSize           minimum grid size for maximum potential occupancy
- * @param [out] blockSize          block size for maximum potential occupancy
- * @param [in]  f                  kernel function for which occupancy is calulated
- * @param [in]  dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block
- * @param [in]  blockSizeLimit     the maximum block size for the kernel, use 0 for no limit
- * @param [in]  flags            Extra flags for occupancy calculation (only default supported)
- *
- * @returns hipSuccess, hipInvalidDevice, hipErrorInvalidValue
- */
-//TODO - Match CUoccupancyB2DSize
-hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize,
-                                             hipFunction_t f, size_t dynSharedMemPerBlk,
-                                             int blockSizeLimit, unsigned int  flags);
-
-/**
- * @brief Returns occupancy for a device function.
- *
- * @param [out] numBlocks        Returned occupancy
- * @param [in]  func             Kernel function (hipFunction) for which occupancy is calulated
- * @param [in]  blockSize        Block size the kernel is intended to be launched with
- * @param [in]  dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block
- */
-hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(
-   int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk);
-
-/**
- * @brief Returns occupancy for a device function.
- *
- * @param [out] numBlocks        Returned occupancy
- * @param [in]  f                Kernel function(hipFunction_t) for which occupancy is calulated
- * @param [in]  blockSize        Block size the kernel is intended to be launched with
- * @param [in]  dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block
- * @param [in]  flags            Extra flags for occupancy calculation (only default supported)
- */
-hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
-   int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags);
-
-/**
- * @brief Returns occupancy for a device function.
- *
- * @param [out] numBlocks        Returned occupancy
- * @param [in]  func             Kernel function for which occupancy is calulated
- * @param [in]  blockSize        Block size the kernel is intended to be launched with
- * @param [in]  dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block
- */
-hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(
-   int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk);
-
-/**
- * @brief Returns occupancy for a device function.
- *
- * @param [out] numBlocks        Returned occupancy
- * @param [in]  f                Kernel function for which occupancy is calulated
- * @param [in]  blockSize        Block size the kernel is intended to be launched with
- * @param [in]  dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block
- * @param [in]  flags            Extra flags for occupancy calculation (currently ignored)
- */
-hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
-   int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags __dparm(hipOccupancyDefault));
-
-/**
- * @brief determine the grid and block sizes to achieves maximum occupancy for a kernel
- *
- * @param [out] gridSize           minimum grid size for maximum potential occupancy
- * @param [out] blockSize          block size for maximum potential occupancy
- * @param [in]  f                  kernel function for which occupancy is calulated
- * @param [in]  dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block
- * @param [in]  blockSizeLimit     the maximum block size for the kernel, use 0 for no limit
- *
- * @returns hipSuccess, hipInvalidDevice, hipErrorInvalidValue
- */
-hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
-                                             const void* f, size_t dynSharedMemPerBlk,
-                                             int blockSizeLimit);
-
-// doxygen end Occupancy
-/**
- * @}
- */
-
-
-/**
- *-------------------------------------------------------------------------------------------------
- *-------------------------------------------------------------------------------------------------
- *  @defgroup Profiler Profiler Control[Deprecated]
- *  @{
- *  This section describes the profiler control functions of HIP runtime API.
- *
- *  @warning The cudaProfilerInitialize API format for "configFile" is not supported.
- *
- */
-
-
-// TODO - expand descriptions:
-/**
- * @brief Start recording of profiling information
- * When using this API, start the profiler with profiling disabled.  (--startdisabled)
- * @warning : hipProfilerStart API is under development.
- */
-DEPRECATED("use roctracer/rocTX instead")
-hipError_t hipProfilerStart();
-
-
-/**
- * @brief Stop recording of profiling information.
- * When using this API, start the profiler with profiling disabled.  (--startdisabled)
- * @warning : hipProfilerStop API is under development.
- */
-DEPRECATED("use roctracer/rocTX instead")
-hipError_t hipProfilerStop();
-
-// doxygen end profiler
-/**
- * @}
- */
-
-/**
- *-------------------------------------------------------------------------------------------------
- *-------------------------------------------------------------------------------------------------
- *  @defgroup Clang Launch API to support the triple-chevron syntax
- *  @{
- *  This section describes the API to support the triple-chevron syntax.
- */
-
-/**
- * @brief Configure a kernel launch.
- *
- * @param [in] gridDim   grid dimension specified as multiple of blockDim.
- * @param [in] blockDim  block dimensions specified in work-items
- * @param [in] sharedMem Amount of dynamic shared memory to allocate for this kernel.  The
- * kernel can access this with HIP_DYNAMIC_SHARED.
- * @param [in] stream    Stream where the kernel should be dispatched.  May be 0, in which case the
- * default stream is used with associated synchronization rules.
- *
- * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue
- *
- */
-hipError_t hipConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem __dparm(0), hipStream_t stream __dparm(0));
-
-
-/**
- * @brief Set a kernel argument.
- *
- * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue
- *
- * @param [in] arg    Pointer the argument in host memory.
- * @param [in] size   Size of the argument.
- * @param [in] offset Offset of the argument on the argument stack.
- *
- */
-hipError_t hipSetupArgument(const void* arg, size_t size, size_t offset);
-
-
-/**
- * @brief Launch a kernel.
- *
- * @param [in] func Kernel to launch.
- *
- * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue
- *
- */
-hipError_t hipLaunchByPtr(const void* func);
-
-
-/**
- * @brief Push configuration of a kernel launch.
- *
- * @param [in] gridDim   grid dimension specified as multiple of blockDim.
- * @param [in] blockDim  block dimensions specified in work-items
- * @param [in] sharedMem Amount of dynamic shared memory to allocate for this kernel.  The
- * kernel can access this with HIP_DYNAMIC_SHARED.
- * @param [in] stream    Stream where the kernel should be dispatched.  May be 0, in which case the
- * default stream is used with associated synchronization rules.
- *
- * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue
- *
- */
-
-hipError_t __hipPushCallConfiguration(dim3 gridDim,
-                                      dim3 blockDim,
-                                      size_t sharedMem __dparm(0),
-                                      hipStream_t stream __dparm(0));
-
-/**
- * @brief Pop configuration of a kernel launch.
- *
- * @param [out] gridDim   grid dimension specified as multiple of blockDim.
- * @param [out] blockDim  block dimensions specified in work-items
- * @param [out] sharedMem Amount of dynamic shared memory to allocate for this kernel.  The
- * kernel can access this with HIP_DYNAMIC_SHARED.
- * @param [out] stream    Stream where the kernel should be dispatched.  May be 0, in which case the
- * default stream is used with associated synchronization rules.
- *
- * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue
- *
- */
-hipError_t __hipPopCallConfiguration(dim3 *gridDim,
-                                     dim3 *blockDim,
-                                     size_t *sharedMem,
-                                     hipStream_t *stream);
-
-/**
- * @brief C compliant kernel launch API
- *
- * @param [in] function_address - kernel stub function pointer.
- * @param [in] numBlocks - number of blocks
- * @param [in] dimBlocks - dimension of a block
- * @param [in] args - kernel arguments
- * @param [in] sharedMemBytes - Amount of dynamic shared memory to allocate for this kernel.  The
- *  Kernel can access this with HIP_DYNAMIC_SHARED.
- * @param [in] stream - Stream where the kernel should be dispatched.  May be 0, in which case th
- *  default stream is used with associated synchronization rules.
- *
- * @returns #hipSuccess, #hipErrorInvalidValue, hipInvalidDevice
- *
- */
-hipError_t hipLaunchKernel(const void* function_address,
-                           dim3 numBlocks,
-                           dim3 dimBlocks,
-                           void** args,
-                           size_t sharedMemBytes __dparm(0),
-                           hipStream_t stream __dparm(0));
-
-#if __HIP_ROCclr__ || !defined(__HCC__)
-//TODO: Move this to hip_ext.h
-hipError_t hipExtLaunchKernel(const void* function_address, dim3 numBlocks, dim3 dimBlocks,
-                              void** args, size_t sharedMemBytes, hipStream_t stream,
-                              hipEvent_t startEvent, hipEvent_t stopEvent, int flags);
-// doxygen end Clang launch
-/**
- * @}
- */
-
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipBindTexture(
-    size_t* offset,
-    const textureReference* tex,
-    const void* devPtr,
-    const hipChannelFormatDesc* desc,
-    size_t size __dparm(UINT_MAX));
-
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipBindTexture2D(
-    size_t* offset,
-    const textureReference* tex,
-    const void* devPtr,
-    const hipChannelFormatDesc* desc,
-    size_t width,
-    size_t height,
-    size_t pitch);
-
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipBindTextureToArray(
-    const textureReference* tex,
-    hipArray_const_t array,
-    const hipChannelFormatDesc* desc);
-
-hipError_t hipBindTextureToMipmappedArray(
-    const textureReference* tex,
-    hipMipmappedArray_const_t mipmappedArray,
-    const hipChannelFormatDesc* desc);
-
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipGetTextureAlignmentOffset(
-    size_t* offset,
-    const textureReference* texref);
-
-hipError_t hipGetTextureReference(
-    const textureReference** texref,
-    const void* symbol);
-
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipUnbindTexture(const textureReference* tex);
-
-hipError_t hipCreateTextureObject(
-    hipTextureObject_t* pTexObject,
-    const hipResourceDesc* pResDesc,
-    const hipTextureDesc* pTexDesc,
-    const struct hipResourceViewDesc* pResViewDesc);
-
-hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject);
-
-hipError_t hipGetChannelDesc(
-    hipChannelFormatDesc* desc,
-    hipArray_const_t array);
-
-hipError_t hipGetTextureObjectResourceDesc(
-    hipResourceDesc* pResDesc,
-    hipTextureObject_t textureObject);
-
-hipError_t hipGetTextureObjectResourceViewDesc(
-    struct hipResourceViewDesc* pResViewDesc,
-    hipTextureObject_t textureObject);
-
-hipError_t hipGetTextureObjectTextureDesc(
-    hipTextureDesc* pTexDesc,
-    hipTextureObject_t textureObject);
-
-hipError_t hipTexRefGetAddress(
-    hipDeviceptr_t* dev_ptr,
-    const textureReference* texRef);
-
-hipError_t hipTexRefGetAddressMode(
-    enum hipTextureAddressMode* pam,
-    const textureReference* texRef,
-    int dim);
-
-hipError_t hipTexRefGetFilterMode(
-    enum hipTextureFilterMode* pfm,
-    const textureReference* texRef);
-
-hipError_t hipTexRefGetFlags(
-    unsigned int* pFlags,
-    const textureReference* texRef);
-
-hipError_t hipTexRefGetFormat(
-    hipArray_Format* pFormat,
-    int* pNumChannels,
-    const textureReference* texRef);
-
-hipError_t hipTexRefGetMaxAnisotropy(
-    int* pmaxAnsio,
-    const textureReference* texRef);
-
-hipError_t hipTexRefGetMipmapFilterMode(
-    enum hipTextureFilterMode* pfm,
-    const textureReference* texRef);
-
-hipError_t hipTexRefGetMipmapLevelBias(
-    float* pbias,
-    const textureReference* texRef);
-
-hipError_t hipTexRefGetMipmapLevelClamp(
-    float* pminMipmapLevelClamp,
-    float* pmaxMipmapLevelClamp,
-    const textureReference* texRef);
-
-hipError_t hipTexRefGetMipMappedArray(
-    hipMipmappedArray_t* pArray,
-    const textureReference* texRef);
-
-hipError_t hipTexRefSetAddress(
-    size_t* ByteOffset,
-    textureReference* texRef,
-    hipDeviceptr_t dptr,
-    size_t bytes);
-
-hipError_t hipTexRefSetAddress2D(
-    textureReference* texRef,
-    const HIP_ARRAY_DESCRIPTOR* desc,
-    hipDeviceptr_t dptr,
-    size_t Pitch);
-
-hipError_t hipTexRefSetAddressMode(
-    textureReference* texRef,
-    int dim,
-    enum hipTextureAddressMode am);
-
-hipError_t hipTexRefSetArray(
-    textureReference* tex,
-    hipArray_const_t array,
-    unsigned int flags);
-
-hipError_t hipTexRefSetBorderColor(
-    textureReference* texRef,
-    float* pBorderColor);
-
-hipError_t hipTexRefSetFilterMode(
-    textureReference* texRef,
-    enum hipTextureFilterMode fm);
-
-hipError_t hipTexRefSetFlags(
-    textureReference* texRef,
-    unsigned int Flags);
-
-hipError_t hipTexRefSetFormat(
-    textureReference* texRef,
-    hipArray_Format fmt,
-    int NumPackedComponents);
-
-hipError_t hipTexRefSetMaxAnisotropy(
-    textureReference* texRef,
-    unsigned int maxAniso);
-
-hipError_t hipTexRefSetMipmapFilterMode(
-    textureReference* texRef,
-    enum hipTextureFilterMode fm);
-
-hipError_t hipTexRefSetMipmapLevelBias(
-    textureReference* texRef,
-    float bias);
-
-hipError_t hipTexRefSetMipmapLevelClamp(
-    textureReference* texRef,
-    float minMipMapLevelClamp,
-    float maxMipMapLevelClamp);
-
-hipError_t hipTexRefSetMipmappedArray(
-    textureReference* texRef,
-    struct hipMipmappedArray* mipmappedArray,
-    unsigned int Flags);
-
-hipError_t hipMipmappedArrayCreate(
-    hipMipmappedArray_t* pHandle,
-    HIP_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc,
-    unsigned int numMipmapLevels);
-
-hipError_t hipMipmappedArrayDestroy(
-    hipMipmappedArray_t hMipmappedArray);
-
-hipError_t hipMipmappedArrayGetLevel(
-    hipArray_t* pLevelArray,
-    hipMipmappedArray_t hMipMappedArray,
-    unsigned int level);
-
-hipError_t hipTexObjectCreate(
-    hipTextureObject_t* pTexObject,
-    const HIP_RESOURCE_DESC* pResDesc,
-    const HIP_TEXTURE_DESC* pTexDesc,
-    const HIP_RESOURCE_VIEW_DESC* pResViewDesc);
-
-hipError_t hipTexObjectDestroy(
-    hipTextureObject_t texObject);
-
-hipError_t hipTexObjectGetResourceDesc(
-    HIP_RESOURCE_DESC* pResDesc,
-    hipTextureObject_t texObject);
-
-hipError_t hipTexObjectGetResourceViewDesc(
-    HIP_RESOURCE_VIEW_DESC* pResViewDesc,
-    hipTextureObject_t texObject);
-
-hipError_t hipTexObjectGetTextureDesc(
-    HIP_TEXTURE_DESC* pTexDesc,
-    hipTextureObject_t texObject);
-#endif
-
-/**
- * @}
- */
-
-
-#ifdef __cplusplus
-} /* extern "c" */
-#endif
-
-#if defined(__cplusplus) && !defined(__HCC__) && defined(__clang__) && defined(__HIP__)
-template <typename T>
-static hipError_t __host__ inline hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
-    T f, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0) {
-    return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize, reinterpret_cast<const void*>(f),dynSharedMemPerBlk,blockSizeLimit);
-}
-
-template <typename T>
-static hipError_t __host__ inline hipOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize,
-    T f, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0, unsigned int  flags = 0 ) {
-    return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize, reinterpret_cast<const void*>(f),dynSharedMemPerBlk,blockSizeLimit);
-}
-#endif  // defined(__cplusplus) && !defined(__HCC__) && defined(__clang__) && defined(__HIP__)
-
-#if defined(__cplusplus) && !defined(__HCC__)
-
-template <typename T>
-hipError_t hipGetSymbolAddress(void** devPtr, const T &symbol) {
-  return ::hipGetSymbolAddress(devPtr, (const void *)&symbol);
-}
-
-template <typename T>
-hipError_t hipGetSymbolSize(size_t* size, const T &symbol) {
-  return ::hipGetSymbolSize(size, (const void *)&symbol);
-}
-
-template <typename T>
-hipError_t hipMemcpyToSymbol(const T& symbol, const void* src, size_t sizeBytes,
-                             size_t offset __dparm(0),
-                             hipMemcpyKind kind __dparm(hipMemcpyHostToDevice)) {
-  return ::hipMemcpyToSymbol((const void*)&symbol, src, sizeBytes, offset, kind);
-}
-
-template <typename T>
-hipError_t hipMemcpyToSymbolAsync(const T& symbol, const void* src, size_t sizeBytes, size_t offset,
-                                  hipMemcpyKind kind, hipStream_t stream __dparm(0)) {
-  return ::hipMemcpyToSymbolAsync((const void*)&symbol, src, sizeBytes, offset, kind, stream);
-}
-
-template <typename T>
-hipError_t hipMemcpyFromSymbol(void* dst, const T &symbol,
-                               size_t sizeBytes, size_t offset __dparm(0),
-                               hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)) {
-  return ::hipMemcpyFromSymbol(dst, (const void*)&symbol, sizeBytes, offset, kind);
-}
-
-template <typename T>
-hipError_t hipMemcpyFromSymbolAsync(void* dst, const T& symbol, size_t sizeBytes, size_t offset,
-                                    hipMemcpyKind kind, hipStream_t stream __dparm(0)) {
-  return ::hipMemcpyFromSymbolAsync(dst, (const void*)&symbol, sizeBytes, offset, kind, stream);
-}
-
-#endif
-
-#if USE_PROF_API
-#include <hip/hcc_detail/hip_prof_str.h>
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-/**
- * Callback/Activity API
- */
-hipError_t hipRegisterApiCallback(uint32_t id, void* fun, void* arg);
-hipError_t hipRemoveApiCallback(uint32_t id);
-hipError_t hipRegisterActivityCallback(uint32_t id, void* fun, void* arg);
-hipError_t hipRemoveActivityCallback(uint32_t id);
-const char* hipApiName(uint32_t id);
-const char* hipKernelNameRef(const hipFunction_t f);
-const char* hipKernelNameRefByPtr(const void* hostFunction, hipStream_t stream);
-int hipGetStreamDeviceId(hipStream_t stream);
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#ifdef __cplusplus
-
-template <class T>
-inline hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(
-    int* numBlocks, T f, int blockSize, size_t dynSharedMemPerBlk) {
-    return hipOccupancyMaxActiveBlocksPerMultiprocessor(
-        numBlocks, reinterpret_cast<const void*>(f), blockSize, dynSharedMemPerBlk);
-}
-
-template <class T>
-inline hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
-    int* numBlocks, T f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags) {
-    return hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
-        numBlocks, reinterpret_cast<const void*>(f), blockSize, dynSharedMemPerBlk, flags);
-}
-
-class TlsData;
-
-#if !__HIP_ROCclr__
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* devPtr,
-                          const hipChannelFormatDesc* desc, size_t size = UINT_MAX);
-#endif
-
-#if !__HIP_ROCclr__
-hipError_t ihipBindTextureImpl(TlsData *tls, int dim, enum hipTextureReadMode readMode, size_t* offset,
-                               const void* devPtr, const struct hipChannelFormatDesc* desc,
-                               size_t size, textureReference* tex);
-#endif
-
-/*
- * @brief hipBindTexture Binds size bytes of the memory area pointed to by @p devPtr to the texture
- *reference tex.
- *
- * @p desc describes how the memory is interpreted when fetching values from the texture. The @p
- *offset parameter is an optional byte offset as with the low-level hipBindTexture() function. Any
- *memory previously bound to tex is unbound.
- *
- *  @param[in]  offset - Offset in bytes
- *  @param[out]  tex - texture to bind
- *  @param[in]  devPtr - Memory area on device
- *  @param[in]  desc - Channel format
- *  @param[in]  size - Size of the memory area pointed to by devPtr
- *  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown
- **/
-#if !__HIP_ROCclr__
-template <class T, int dim, enum hipTextureReadMode readMode>
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipBindTexture(size_t* offset, struct texture<T, dim, readMode>& tex, const void* devPtr,
-                          const struct hipChannelFormatDesc& desc, size_t size = UINT_MAX) {
-    return ihipBindTextureImpl(nullptr, dim, readMode, offset, devPtr, &desc, size, &tex);
-}
-#endif
-
-/*
- * @brief hipBindTexture Binds size bytes of the memory area pointed to by @p devPtr to the texture
- *reference tex.
- *
- * @p desc describes how the memory is interpreted when fetching values from the texture. The @p
- *offset parameter is an optional byte offset as with the low-level hipBindTexture() function. Any
- *memory previously bound to tex is unbound.
- *
- *  @param[in]  offset - Offset in bytes
- *  @param[in]  tex - texture to bind
- *  @param[in]  devPtr - Memory area on device
- *  @param[in]  size - Size of the memory area pointed to by devPtr
- *  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown
- **/
-#if !__HIP_ROCclr__
-template <class T, int dim, enum hipTextureReadMode readMode>
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipBindTexture(size_t* offset, struct texture<T, dim, readMode>& tex, const void* devPtr,
-                          size_t size = UINT_MAX) {
-    return ihipBindTextureImpl(nullptr, dim, readMode, offset, devPtr, &(tex.channelDesc), size, &tex);
-}
-#endif
-
-// C API
-#if !__HIP_ROCclr__
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipBindTexture2D(size_t* offset, textureReference* tex, const void* devPtr,
-                            const hipChannelFormatDesc* desc, size_t width, size_t height,
-                            size_t pitch);
-#endif
-
-#if !__HIP_ROCclr__
-hipError_t ihipBindTexture2DImpl(int dim, enum hipTextureReadMode readMode, size_t* offset,
-                                 const void* devPtr, const struct hipChannelFormatDesc* desc,
-                                 size_t width, size_t height, textureReference* tex, size_t pitch);
-#endif
-
-#if !__HIP_ROCclr__
-template <class T, int dim, enum hipTextureReadMode readMode>
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipBindTexture2D(size_t* offset, struct texture<T, dim, readMode>& tex,
-                            const void* devPtr, size_t width, size_t height, size_t pitch) {
-    return ihipBindTexture2DImpl(dim, readMode, offset, devPtr, &(tex.channelDesc), width, height,
-                                 &tex);
-}
-#endif
-
-#if !__HIP_ROCclr__
-template <class T, int dim, enum hipTextureReadMode readMode>
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipBindTexture2D(size_t* offset, struct texture<T, dim, readMode>& tex,
-                            const void* devPtr, const struct hipChannelFormatDesc& desc,
-                            size_t width, size_t height, size_t pitch) {
-    return ihipBindTexture2DImpl(dim, readMode, offset, devPtr, &desc, width, height, &tex);
-}
-#endif
-
-// C API
-#if !__HIP_ROCclr__
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array,
-                                 const hipChannelFormatDesc* desc);
-#endif
-
-#if !__HIP_ROCclr__
-hipError_t ihipBindTextureToArrayImpl(TlsData *tls, int dim, enum hipTextureReadMode readMode,
-                                      hipArray_const_t array,
-                                      const struct hipChannelFormatDesc& desc,
-                                      textureReference* tex);
-#endif
-
-#if !__HIP_ROCclr__
-template <class T, int dim, enum hipTextureReadMode readMode>
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipBindTextureToArray(struct texture<T, dim, readMode>& tex, hipArray_const_t array) {
-    return ihipBindTextureToArrayImpl(nullptr, dim, readMode, array, tex.channelDesc, &tex);
-}
-#endif
-
-#if !__HIP_ROCclr__
-template <class T, int dim, enum hipTextureReadMode readMode>
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipBindTextureToArray(struct texture<T, dim, readMode>& tex, hipArray_const_t array,
-                                 const struct hipChannelFormatDesc& desc) {
-    return ihipBindTextureToArrayImpl(nullptr, dim, readMode, array, desc, &tex);
-}
-#endif
-
-#if !__HIP_ROCclr__
-template <class T, int dim, enum hipTextureReadMode readMode>
-DEPRECATED(DEPRECATED_MSG)
-inline static hipError_t hipBindTextureToArray(struct texture<T, dim, readMode> *tex,
-                                               hipArray_const_t array,
-                                               const struct hipChannelFormatDesc* desc) {
-    return ihipBindTextureToArrayImpl(nullptr, dim, readMode, array, *desc, tex);
-}
-#endif
-
-// C API
-#if !__HIP_ROCclr__
-hipError_t hipBindTextureToMipmappedArray(const textureReference* tex,
-                                          hipMipmappedArray_const_t mipmappedArray,
-                                          const hipChannelFormatDesc* desc);
-#endif
-
-#if !__HIP_ROCclr__
-template <class T, int dim, enum hipTextureReadMode readMode>
-hipError_t hipBindTextureToMipmappedArray(const texture<T, dim, readMode>& tex,
-                                          hipMipmappedArray_const_t mipmappedArray) {
-    return hipSuccess;
-}
-#endif
-
-#if !__HIP_ROCclr__
-template <class T, int dim, enum hipTextureReadMode readMode>
-hipError_t hipBindTextureToMipmappedArray(const texture<T, dim, readMode>& tex,
-                                          hipMipmappedArray_const_t mipmappedArray,
-                                          const hipChannelFormatDesc& desc) {
-    return hipSuccess;
-}
-#endif
-
-#if __HIP_ROCclr__ && !defined(__HCC__)
-
-template <typename F>
-inline hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
-                                                    F kernel, size_t dynSharedMemPerBlk, uint32_t blockSizeLimit) {
-return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize,(hipFunction_t)kernel, dynSharedMemPerBlk, blockSizeLimit);
-}
-
-template <class T>
-inline hipError_t hipLaunchCooperativeKernel(T f, dim3 gridDim, dim3 blockDim,
-                                             void** kernelParams, unsigned int sharedMemBytes, hipStream_t stream) {
-    return hipLaunchCooperativeKernel(reinterpret_cast<const void*>(f), gridDim,
-                                      blockDim, kernelParams, sharedMemBytes, stream);
-}
-
-template <class T>
-inline hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
-                                                        unsigned int  numDevices, unsigned int  flags = 0) {
-    return hipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags);
-}
-
-
-template <class T>
-inline hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList,
-                                                     unsigned int  numDevices, unsigned int  flags = 0) {
-    return hipExtLaunchMultiKernelMultiDevice(launchParamsList, numDevices, flags);
-}
-
-#endif
-
-/*
- * @brief Unbinds the textuer bound to @p tex
- *
- *  @param[in]  tex - texture to unbind
- *
- *  @return #hipSuccess
- **/
-#if !__HIP_ROCclr__
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipUnbindTexture(const textureReference* tex);
-#endif
-
-#if !__HIP_ROCclr__
-extern hipError_t ihipUnbindTextureImpl(const hipTextureObject_t& textureObject);
-#endif
-
-#if !__HIP_ROCclr__
-template <class T, int dim, enum hipTextureReadMode readMode>
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipUnbindTexture(struct texture<T, dim, readMode>& tex) {
-    return ihipUnbindTextureImpl(tex.textureObject);
-}
-#endif
-
-#if !__HIP_ROCclr__
-hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array);
-
-DEPRECATED(DEPRECATED_MSG)
-hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* texref);
-
-hipError_t hipGetTextureReference(const textureReference** texref, const void* symbol);
-
-hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResourceDesc* pResDesc,
-                                  const hipTextureDesc* pTexDesc,
-                                  const hipResourceViewDesc* pResViewDesc);
-
-hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject);
-
-hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc,
-                                           hipTextureObject_t textureObject);
-hipError_t hipGetTextureObjectResourceViewDesc(hipResourceViewDesc* pResViewDesc,
-                                               hipTextureObject_t textureObject);
-hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc,
-                                          hipTextureObject_t textureObject);
-hipError_t hipTexRefSetArray(textureReference* tex, hipArray_const_t array, unsigned int flags);
-
-hipError_t hipTexRefGetArray(hipArray_t* array, textureReference tex);
-
-hipError_t hipTexRefSetAddressMode(textureReference* tex, int dim, hipTextureAddressMode am);
-
-hipError_t hipTexRefGetAddressMode(hipTextureAddressMode* am, textureReference tex, int dim);
-
-hipError_t hipTexRefSetFilterMode(textureReference* tex, hipTextureFilterMode fm);
-
-hipError_t hipTexRefSetFlags(textureReference* tex, unsigned int flags);
-
-hipError_t hipTexRefSetFormat(textureReference* tex, hipArray_Format fmt, int NumPackedComponents);
-
-hipError_t hipTexRefSetAddress(size_t* offset, textureReference* tex, hipDeviceptr_t devPtr,
-                               size_t size);
-
-hipError_t hipTexRefGetAddress(hipDeviceptr_t* dev_ptr, textureReference tex);
-
-hipError_t hipTexRefSetAddress2D(textureReference* tex, const HIP_ARRAY_DESCRIPTOR* desc,
-                                 hipDeviceptr_t devPtr, size_t pitch);
-#endif
-
-hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject, const hipResourceDesc* pResDesc);
-
-hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject);
-
-#if __HIP_ROCclr__
-template <class T, int dim, enum hipTextureReadMode readMode>
-DEPRECATED(DEPRECATED_MSG)
-static inline hipError_t hipBindTexture(size_t* offset, const struct texture<T, dim, readMode>& tex,
-                                        const void* devPtr, size_t size = UINT_MAX) {
-    return hipBindTexture(offset, &tex, devPtr, &tex.channelDesc, size);
-}
-
-template <class T, int dim, enum hipTextureReadMode readMode>
-DEPRECATED(DEPRECATED_MSG)
-static inline hipError_t
-    hipBindTexture(size_t* offset, const struct texture<T, dim, readMode>& tex, const void* devPtr,
-                   const struct hipChannelFormatDesc& desc, size_t size = UINT_MAX) {
-    return hipBindTexture(offset, &tex, devPtr, &desc, size);
-}
-
-template<class T, int dim, enum hipTextureReadMode readMode>
-DEPRECATED(DEPRECATED_MSG)
-static inline hipError_t hipBindTexture2D(
-    size_t *offset,
-    const struct texture<T, dim, readMode> &tex,
-    const void *devPtr,
-    size_t width,
-    size_t height,
-    size_t pitch)
-{
-    return hipBindTexture2D(offset, &tex, devPtr, &tex.channelDesc, width, height, pitch);
-}
-
-template<class T, int dim, enum hipTextureReadMode readMode>
-DEPRECATED(DEPRECATED_MSG)
-static inline hipError_t hipBindTexture2D(
-  size_t *offset,
-  const struct texture<T, dim, readMode> &tex,
-  const void *devPtr,
-  const struct hipChannelFormatDesc &desc,
-  size_t width,
-  size_t height,
-  size_t pitch)
-{
-  return hipBindTexture2D(offset, &tex, devPtr, &desc, width, height, pitch);
-}
-
-template<class T, int dim, enum hipTextureReadMode readMode>
-DEPRECATED(DEPRECATED_MSG)
-static inline hipError_t hipBindTextureToArray(
-    const struct texture<T, dim, readMode> &tex,
-    hipArray_const_t array)
-{
-    struct hipChannelFormatDesc desc;
-    hipError_t err = hipGetChannelDesc(&desc, array);
-    return (err == hipSuccess) ? hipBindTextureToArray(&tex, array, &desc) : err;
-}
-
-template<class T, int dim, enum hipTextureReadMode readMode>
-DEPRECATED(DEPRECATED_MSG)
-static inline hipError_t hipBindTextureToArray(
-    const struct texture<T, dim, readMode> &tex,
-    hipArray_const_t array,
-    const struct hipChannelFormatDesc &desc)
-{
-    return hipBindTextureToArray(&tex, array, &desc);
-}
-
-template<class T, int dim, enum hipTextureReadMode readMode>
-static inline hipError_t hipBindTextureToMipmappedArray(
-    const struct texture<T, dim, readMode> &tex,
-    hipMipmappedArray_const_t mipmappedArray)
-{
-    struct hipChannelFormatDesc desc;
-    hipArray_t levelArray;
-    hipError_t err = hipGetMipmappedArrayLevel(&levelArray, mipmappedArray, 0);
-    if (err != hipSuccess) {
-        return err;
-    }
-    err = hipGetChannelDesc(&desc, levelArray);
-    return (err == hipSuccess) ? hipBindTextureToMipmappedArray(&tex, mipmappedArray, &desc) : err;
-}
-
-template<class T, int dim, enum hipTextureReadMode readMode>
-static inline hipError_t hipBindTextureToMipmappedArray(
-    const struct texture<T, dim, readMode> &tex,
-    hipMipmappedArray_const_t mipmappedArray,
-    const struct hipChannelFormatDesc &desc)
-{
-    return hipBindTextureToMipmappedArray(&tex, mipmappedArray, &desc);
-}
-
-template<class T, int dim, enum hipTextureReadMode readMode>
-DEPRECATED(DEPRECATED_MSG)
-static inline hipError_t hipUnbindTexture(
-    const struct texture<T, dim, readMode> &tex)
-{
-    return hipUnbindTexture(&tex);
-}
-#endif
-
-// doxygen end Texture
-/**
- * @}
- */
-
-
-#endif
-
-#ifdef __GNUC__
-#pragma GCC visibility pop
-#endif
-
-// doxygen end HIP API
-/**
- *   @}
- */
-
-#endif
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_runtime_prof.h b/third_party/rocm/include/hip/hcc_detail/hip_runtime_prof.h
deleted file mode 100644
index ffd8b0a..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_runtime_prof.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
-Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_PROF_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_PROF_H
-
-// HIP ROCclr Op IDs enumeration
-enum HipVdiOpId {
-  kHipVdiOpIdDispatch = 0,
-  kHipVdiOpIdCopy     = 1,
-  kHipVdiOpIdBarrier  = 2,
-  kHipVdiOpIdNumber   = 3
-};
-
-// Types of ROCclr commands
-enum HipVdiCommandKind {
-  kHipVdiCommandKernel            = 0x11F0,
-  kHipVdiMemcpyDeviceToHost       = 0x11F3,
-  kHipHipVdiMemcpyHostToDevice    = 0x11F4,
-  kHipVdiMemcpyDeviceToDevice     = 0x11F5,
-  kHipVidMemcpyDeviceToHostRect   = 0x1201,
-  kHipVdiMemcpyHostToDeviceRect   = 0x1202,
-  kHipVdiMemcpyDeviceToDeviceRect = 0x1203,
-  kHipVdiFillMemory               = 0x1207,
-}; 
-
-/**
- * @brief Initializes activity callback
- *
- * @param [input] id_callback Event ID callback function
- * @param [input] op_callback Event operation callback function
- * @param [input] arg         Arguments passed into callback
- *
- * @returns None
- */
-void hipInitActivityCallback(void* id_callback, void* op_callback, void* arg);
-
-/**
- * @brief Enables activity callback
- *
- * @param [input] op      Operation, which will trigger a callback (@see HipVdiOpId)
- * @param [input] enable  Enable state for the callback
- *
- * @returns True if successful
- */
-bool hipEnableActivityCallback(uint32_t op, bool enable);
-
-/**
- * @brief Returns the description string for the operation kind
- *
- * @param [input] id      Command kind id (@see HipVdiCommandKind)
- *
- * @returns A pointer to a const string with the command description
- */
-const char* hipGetCmdName(uint32_t id);
-
-#endif // HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_PROF_H
-
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_surface_types.h b/third_party/rocm/include/hip/hcc_detail/hip_surface_types.h
deleted file mode 100644
index f74c01d..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_surface_types.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
-Copyright (c) 2015- present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/**
- *  @file  hcc_detail/hip_surface_types.h
- *  @brief Defines surface types for HIP runtime.
- */
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_SURFACE_TYPES_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_SURFACE_TYPES_H
-
-#include <hip/hcc_detail/driver_types.h>
-
-/**
- * An opaque value that represents a hip surface object
- */
-typedef unsigned long long hipSurfaceObject_t;
-
-/**
- * hip surface reference
- */
-struct surfaceReference {
-    hipSurfaceObject_t surfaceObject;
-};
-
-/**
- * hip surface boundary modes
- */
-enum hipSurfaceBoundaryMode {
-    hipBoundaryModeZero = 0,
-    hipBoundaryModeTrap = 1,
-    hipBoundaryModeClamp = 2
-};
-
-#endif /* !HIP_INCLUDE_HIP_HCC_DETAIL_HIP_SURFACE_TYPES_H */
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_texture_types.h b/third_party/rocm/include/hip/hcc_detail/hip_texture_types.h
deleted file mode 100644
index a46b236..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_texture_types.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
-Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/**
- *  @file  hcc_detail/hip_texture_types.h
- *  @brief Defines the different newt vector types for HIP runtime.
- */
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_TEXTURE_TYPES_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_TEXTURE_TYPES_H
-
-/*******************************************************************************
- *                                                                              *
- *                                                                              *
- *                                                                              *
- *******************************************************************************/
-#include <limits.h>
-//#include <hip/hcc_detail/driver_types.h>
-#include <hip/hcc_detail/channel_descriptor.h>
-#include <hip/hcc_detail/texture_types.h>
-
-#if __cplusplus
-
-/*******************************************************************************
- *                                                                              *
- *                                                                              *
- *                                                                              *
- *******************************************************************************/
-#if __HIP__
-#define __HIP_TEXTURE_ATTRIB __attribute__((device_builtin_texture_type))
-#else
-#define __HIP_TEXTURE_ATTRIB
-#endif
-
-typedef textureReference* hipTexRef;
-
-template <class T, int texType = hipTextureType1D,
-          enum hipTextureReadMode mode = hipReadModeElementType>
-struct __HIP_TEXTURE_ATTRIB texture : public textureReference {
-    texture(int norm = 0, enum hipTextureFilterMode fMode = hipFilterModePoint,
-            enum hipTextureAddressMode aMode = hipAddressModeClamp) {
-        normalized = norm;
-        readMode = mode;
-        filterMode = fMode;
-        addressMode[0] = aMode;
-        addressMode[1] = aMode;
-        addressMode[2] = aMode;
-        channelDesc = hipCreateChannelDesc<T>();
-        sRGB = 0;
-        textureObject = nullptr;
-        maxAnisotropy = 0;
-        mipmapLevelBias = 0;
-        minMipmapLevelClamp = 0;
-        maxMipmapLevelClamp = 0;
-    }
-
-    texture(int norm, enum hipTextureFilterMode fMode, enum hipTextureAddressMode aMode,
-            struct hipChannelFormatDesc desc) {
-        normalized = norm;
-        readMode = mode;
-        filterMode = fMode;
-        addressMode[0] = aMode;
-        addressMode[1] = aMode;
-        addressMode[2] = aMode;
-        channelDesc = desc;
-        sRGB = 0;
-        textureObject = nullptr;
-        maxAnisotropy = 0;
-        mipmapLevelBias = 0;
-        minMipmapLevelClamp = 0;
-        maxMipmapLevelClamp = 0;
-    }
-};
-
-#endif /* __cplusplus */
-
-#endif /* !HIP_INCLUDE_HIP_HCC_DETAIL_HIP_TEXTURE_TYPES_H */
diff --git a/third_party/rocm/include/hip/hcc_detail/hip_vector_types.h b/third_party/rocm/include/hip/hcc_detail/hip_vector_types.h
deleted file mode 100644
index 69525c5..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hip_vector_types.h
+++ /dev/null
@@ -1,1593 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/**
- *  @file  hcc_detail/hip_vector_types.h
- *  @brief Defines the different newt vector types for HIP runtime.
- */
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_VECTOR_TYPES_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_VECTOR_TYPES_H
-
-#if defined(__HCC__) && (__hcc_workweek__ < 16032)
-#error("This version of HIP requires a newer version of HCC.");
-#endif
-
-#include "hip/hcc_detail/host_defines.h"
-
-#if defined(__has_attribute)
-    #if __has_attribute(ext_vector_type)
-        #define __NATIVE_VECTOR__(n, T) T __attribute__((ext_vector_type(n)))
-    #else
-        #define __NATIVE_VECTOR__(n, T) T[n]
-    #endif
-
-#if defined(__cplusplus)
-    #include <array>
-    #include <iosfwd>
-    #include <type_traits>
-
-    namespace hip_impl {
-        template<typename, typename, unsigned int> struct Scalar_accessor;
-    } // Namespace hip_impl.
-
-    namespace std {
-        template<typename T, typename U, unsigned int n>
-        struct is_integral<hip_impl::Scalar_accessor<T, U, n>>
-            : is_integral<T> {};
-        template<typename T, typename U, unsigned int n>
-        struct is_floating_point<hip_impl::Scalar_accessor<T, U, n>>
-            : is_floating_point<T> {};
-    } // Namespace std.
-
-    namespace hip_impl {
-        template<typename T, typename Vector, unsigned int idx>
-        struct Scalar_accessor {
-            struct Address {
-                const Scalar_accessor* p;
-
-                __host__ __device__
-                operator const T*() const noexcept {
-                    return &reinterpret_cast<const T*>(p)[idx];
-                }
-                __host__ __device__
-                operator const T*() const volatile noexcept {
-                    return &reinterpret_cast<const T*>(p)[idx];
-                }
-                __host__ __device__
-                operator T*() noexcept {
-                    return &reinterpret_cast<T*>(
-                        const_cast<Scalar_accessor*>(p))[idx];
-                }
-                __host__ __device__
-                operator T*() volatile noexcept {
-                    return &reinterpret_cast<T*>(
-                        const_cast<Scalar_accessor*>(p))[idx];
-                }
-            };
-
-            friend
-            inline
-            std::ostream& operator<<(std::ostream& os,
-                                     const Scalar_accessor& x) noexcept {
-                return os << x.data[idx];
-            }
-            friend
-            inline
-            std::istream& operator>>(std::istream& is,
-                                     Scalar_accessor& x) noexcept {
-                T tmp;
-                is >> tmp;
-                x.data[idx] = tmp;
-
-                return is;
-            }
-
-            // Idea from https://t0rakka.silvrback.com/simd-scalar-accessor
-            Vector data;
-
-            __host__ __device__
-            operator T() const noexcept { return data[idx]; }
-            __host__ __device__
-            operator T() const volatile noexcept { return data[idx]; }
-
-#ifdef __HIP_ENABLE_VECTOR_SCALAR_ACCESSORY_ENUM_CONVERSION__
-            // The conversions to enum are fairly ghastly, but unfortunately used in
-            // some pre-existing, difficult to modify, code.
-            template<
-                typename U,
-                typename std::enable_if<
-                    !std::is_same<U, T>{} &&
-                    std::is_enum<U>{} &&
-                    std::is_convertible<
-                        T, typename std::enable_if<std::is_enum<U>::value, std::underlying_type<U>>::type::type>{}>::type* = nullptr>
-            __host__ __device__
-            operator U() const noexcept { return static_cast<U>(data[idx]); }
-            template<
-                typename U,
-                typename std::enable_if<
-                    !std::is_same<U, T>{} &&
-                    std::is_enum<U>{} &&
-                    std::is_convertible<
-                        T, typename std::enable_if<std::is_enum<U>::value, std::underlying_type<U>>::type::type>{}>::type* = nullptr>
-            __host__ __device__
-            operator U() const volatile noexcept { return static_cast<U>(data[idx]); }
-#endif
-
-            __host__ __device__
-            operator T&() noexcept {
-                return reinterpret_cast<
-                    T (&)[sizeof(Vector) / sizeof(T)]>(data)[idx];
-            }
-            __host__ __device__
-            operator volatile T&() volatile noexcept {
-                return reinterpret_cast<
-                    volatile T (&)[sizeof(Vector) / sizeof(T)]>(data)[idx];
-            }
-
-            __host__ __device__
-            Address operator&() const noexcept { return Address{this}; }
-
-            __host__ __device__
-            Scalar_accessor& operator=(const Scalar_accessor& x) noexcept {
-                data[idx] = x.data[idx];
-
-                return *this;
-            }
-            __host__ __device__
-            Scalar_accessor& operator=(T x) noexcept {
-                data[idx] = x;
-
-                return *this;
-            }
-            __host__ __device__
-            volatile Scalar_accessor& operator=(T x) volatile noexcept {
-                data[idx] = x;
-
-                return *this;
-            }
-
-            __host__ __device__
-            Scalar_accessor& operator++() noexcept {
-                ++data[idx];
-                return *this;
-            }
-            __host__ __device__
-            T operator++(int) noexcept {
-                auto r{data[idx]};
-                ++data[idx];
-                return *this;
-            }
-            __host__ __device__
-            Scalar_accessor& operator--() noexcept {
-                --data[idx];
-                return *this;
-            }
-            __host__ __device__
-            T operator--(int) noexcept {
-                auto r{data[idx]};
-                --data[idx];
-                return *this;
-            }
-
-            // TODO: convertibility is too restrictive, constraint should be on
-            //       the operator being invocable with a value of type U.
-            template<
-                typename U,
-                typename std::enable_if<
-                    std::is_convertible<U, T>{}>::type* = nullptr>
-            __host__ __device__
-            Scalar_accessor& operator+=(U x) noexcept {
-                data[idx] += x;
-                return *this;
-            }
-            template<
-                typename U,
-                typename std::enable_if<
-                    std::is_convertible<U, T>{}>::type* = nullptr>
-            __host__ __device__
-            Scalar_accessor& operator-=(U x) noexcept {
-                data[idx] -= x;
-                return *this;
-            }
-
-            template<
-                typename U,
-                typename std::enable_if<
-                    std::is_convertible<U, T>{}>::type* = nullptr>
-            __host__ __device__
-            Scalar_accessor& operator*=(U x) noexcept {
-                data[idx] *= x;
-                return *this;
-            }
-            template<
-                typename U,
-                typename std::enable_if<
-                    std::is_convertible<U, T>{}>::type* = nullptr>
-            __host__ __device__
-            Scalar_accessor& operator/=(U x) noexcept {
-                data[idx] /= x;
-                return *this;
-            }
-            template<
-                typename U = T,
-                typename std::enable_if<std::is_convertible<U, T>{} &&
-                                        std::is_integral<U>{}>::type* = nullptr>
-            __host__ __device__
-            Scalar_accessor& operator%=(U x) noexcept {
-                data[idx] %= x;
-                return *this;
-            }
-
-            template<
-                typename U = T,
-                typename std::enable_if<std::is_convertible<U, T>{} &&
-                                        std::is_integral<U>{}>::type* = nullptr>
-            __host__ __device__
-            Scalar_accessor& operator>>=(U x) noexcept {
-                data[idx] >>= x;
-                return *this;
-            }
-            template<
-                typename U = T,
-                typename std::enable_if<std::is_convertible<U, T>{} &&
-                                        std::is_integral<U>{}>::type* = nullptr>
-            __host__ __device__
-            Scalar_accessor& operator<<=(U x) noexcept {
-                data[idx] <<= x;
-                return *this;
-            }
-            template<
-                typename U = T,
-                typename std::enable_if<std::is_convertible<U, T>{} &&
-                                        std::is_integral<U>{}>::type* = nullptr>
-            __host__ __device__
-            Scalar_accessor& operator&=(U x) noexcept {
-                data[idx] &= x;
-                return *this;
-            }
-            template<
-                typename U = T,
-                typename std::enable_if<std::is_convertible<U, T>{} &&
-                                        std::is_integral<U>{}>::type* = nullptr>
-            __host__ __device__
-            Scalar_accessor& operator|=(U x) noexcept {
-                data[idx] |= x;
-                return *this;
-            }
-            template<
-                typename U = T,
-                typename std::enable_if<std::is_convertible<U, T>{} &&
-                                        std::is_integral<U>{}>::type* = nullptr>
-            __host__ __device__
-            Scalar_accessor& operator^=(U x) noexcept {
-                data[idx] ^= x;
-                return *this;
-            }
-        };
-
-        inline
-        constexpr
-        unsigned int next_pot(unsigned int x) {
-            // Precondition: x > 1.
-	        return 1u << (32u - __builtin_clz(x - 1u));
-        }
-    } // Namespace hip_impl.
-
-    template<typename T, unsigned int n> struct HIP_vector_base;
-
-    template<typename T>
-    struct HIP_vector_base<T, 1> {
-        using Native_vec_ = __NATIVE_VECTOR__(1, T);
-
-        union {
-            Native_vec_ data;
-#if __HIP_CLANG_ONLY__
-            struct {
-                T x;
-            };
-#else
-            hip_impl::Scalar_accessor<T, Native_vec_, 0> x;
-#endif
-        };
-
-        using value_type = T;
-
-        __host__ __device__
-        HIP_vector_base() = default;
-        __host__ __device__
-        explicit
-        constexpr
-        HIP_vector_base(T x) noexcept : data{x} {}
-        __host__ __device__
-        constexpr
-        HIP_vector_base(const HIP_vector_base&) = default;
-        __host__ __device__
-        constexpr
-        HIP_vector_base(HIP_vector_base&&) = default;
-        __host__ __device__
-        ~HIP_vector_base() = default;
-
-        __host__ __device__
-        HIP_vector_base& operator=(const HIP_vector_base& x) noexcept {
-            #if __has_attribute(ext_vector_type)
-                data = x.data;
-            #else
-                data[0] = x.data[0];
-            #endif
-
-            return *this;
-        }
-    };
-
-    template<typename T>
-    struct HIP_vector_base<T, 2> {
-        using Native_vec_ = __NATIVE_VECTOR__(2, T);
-
-        union
-        #if !__has_attribute(ext_vector_type)
-            alignas(hip_impl::next_pot(2 * sizeof(T)))
-        #endif
-        {
-            Native_vec_ data;
-#if __HIP_CLANG_ONLY__
-            struct {
-                T x;
-                T y;
-            };
-#else
-            hip_impl::Scalar_accessor<T, Native_vec_, 0> x;
-            hip_impl::Scalar_accessor<T, Native_vec_, 1> y;
-#endif
-        };
-
-        using value_type = T;
-
-        __host__ __device__
-        HIP_vector_base() = default;
-        __host__ __device__
-        explicit
-        constexpr
-        HIP_vector_base(T x) noexcept : data{x, x} {}
-        __host__ __device__
-        constexpr
-        HIP_vector_base(T x, T y) noexcept : data{x, y} {}
-        __host__ __device__
-        constexpr
-        HIP_vector_base(const HIP_vector_base&) = default;
-        __host__ __device__
-        constexpr
-        HIP_vector_base(HIP_vector_base&&) = default;
-        __host__ __device__
-        ~HIP_vector_base() = default;
-
-        __host__ __device__
-        HIP_vector_base& operator=(const HIP_vector_base& x) noexcept {
-            #if __has_attribute(ext_vector_type)
-                data = x.data;
-            #else
-                data[0] = x.data[0];
-                data[1] = x.data[1];
-            #endif
-
-            return *this;
-        }
-    };
-
-    template<typename T>
-    struct HIP_vector_base<T, 3> {
-        struct Native_vec_ {
-            T d[3];
-
-            __host__ __device__
-            Native_vec_() = default;
-
-            __host__ __device__
-            explicit
-            constexpr
-            Native_vec_(T x) noexcept : d{x, x, x} {}
-            __host__ __device__
-            constexpr
-            Native_vec_(T x, T y, T z) noexcept : d{x, y, z} {}
-            __host__ __device__
-            constexpr
-            Native_vec_(const Native_vec_&) = default;
-            __host__ __device__
-            constexpr
-            Native_vec_(Native_vec_&&) = default;
-            __host__ __device__
-            ~Native_vec_() = default;
-
-            __host__ __device__
-            Native_vec_& operator=(const Native_vec_&) = default;
-            __host__ __device__
-            Native_vec_& operator=(Native_vec_&&) = default;
-
-            __host__ __device__
-            T& operator[](unsigned int idx) noexcept { return d[idx]; }
-            __host__ __device__
-            T operator[](unsigned int idx) const noexcept { return d[idx]; }
-
-            __host__ __device__
-            Native_vec_& operator+=(const Native_vec_& x) noexcept
-            {
-                for (auto i = 0u; i != 3u; ++i) d[i] += x.d[i];
-                return *this;
-            }
-            __host__ __device__
-            Native_vec_& operator-=(const Native_vec_& x) noexcept
-            {
-                for (auto i = 0u; i != 3u; ++i) d[i] -= x.d[i];
-                return *this;
-            }
-
-            __host__ __device__
-            Native_vec_& operator*=(const Native_vec_& x) noexcept
-            {
-                for (auto i = 0u; i != 3u; ++i) d[i] *= x.d[i];
-                return *this;
-            }
-            __host__ __device__
-            Native_vec_& operator/=(const Native_vec_& x) noexcept
-            {
-                for (auto i = 0u; i != 3u; ++i) d[i] /= x.d[i];
-                return *this;
-            }
-
-            template<
-                typename U = T,
-                typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
-            __host__ __device__
-            Native_vec_ operator-() const noexcept
-            {
-                auto r{*this};
-                for (auto&& x : r.d) x = -x;
-                return r;
-            }
-
-            template<
-                typename U = T,
-                typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
-            __host__ __device__
-            Native_vec_ operator~() const noexcept
-            {
-                auto r{*this};
-                for (auto&& x : r.d) x = ~x;
-                return r;
-            }
-            template<
-                typename U = T,
-                typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
-            __host__ __device__
-            Native_vec_& operator%=(const Native_vec_& x) noexcept
-            {
-                for (auto i = 0u; i != 3u; ++i) d[i] %= x.d[i];
-                return *this;
-            }
-            template<
-                typename U = T,
-                typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
-            __host__ __device__
-            Native_vec_& operator^=(const Native_vec_& x) noexcept
-            {
-                for (auto i = 0u; i != 3u; ++i) d[i] ^= x.d[i];
-                return *this;
-            }
-            template<
-                typename U = T,
-                typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
-            __host__ __device__
-            Native_vec_& operator|=(const Native_vec_& x) noexcept
-            {
-                for (auto i = 0u; i != 3u; ++i) d[i] |= x.d[i];
-                return *this;
-            }
-            template<
-                typename U = T,
-                typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
-            __host__ __device__
-            Native_vec_& operator&=(const Native_vec_& x) noexcept
-            {
-                for (auto i = 0u; i != 3u; ++i) d[i] &= x.d[i];
-                return *this;
-            }
-            template<
-                typename U = T,
-                typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
-            __host__ __device__
-            Native_vec_& operator>>=(const Native_vec_& x) noexcept
-            {
-                for (auto i = 0u; i != 3u; ++i) d[i] >>= x.d[i];
-                return *this;
-            }
-            template<
-                typename U = T,
-                typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
-            __host__ __device__
-            Native_vec_& operator<<=(const Native_vec_& x) noexcept
-            {
-                for (auto i = 0u; i != 3u; ++i) d[i] <<= x.d[i];
-                return *this;
-            }
-
-            using Vec3_cmp = int __attribute__((vector_size(4 * sizeof(int))));
-            __host__ __device__
-            Vec3_cmp operator==(const Native_vec_& x) const noexcept
-            {
-                return Vec3_cmp{d[0] == x.d[0], d[1] == x.d[1], d[2] == x.d[2]};
-            }
-        };
-
-        union {
-            Native_vec_ data;
-            struct {
-                T x;
-                T y;
-                T z;
-            };
-        };
-
-        using value_type = T;
-
-        __host__ __device__
-        HIP_vector_base() = default;
-        __host__ __device__
-        explicit
-        constexpr
-        HIP_vector_base(T x) noexcept : data{x, x, x} {}
-        __host__ __device__
-        constexpr
-        HIP_vector_base(T x, T y, T z) noexcept : data{x, y, z} {}
-        __host__ __device__
-        constexpr
-        HIP_vector_base(const HIP_vector_base&) = default;
-        __host__ __device__
-        constexpr
-        HIP_vector_base(HIP_vector_base&&) = default;
-        __host__ __device__
-        ~HIP_vector_base() = default;
-
-        __host__ __device__
-        HIP_vector_base& operator=(const HIP_vector_base&) = default;
-        __host__ __device__
-        HIP_vector_base& operator=(HIP_vector_base&&) = default;
-    };
-
-    template<typename T>
-    struct HIP_vector_base<T, 4> {
-        using Native_vec_ = __NATIVE_VECTOR__(4, T);
-
-        union
-        #if !__has_attribute(ext_vector_type)
-            alignas(hip_impl::next_pot(4 * sizeof(T)))
-        #endif
-        {
-            Native_vec_ data;
-#if __HIP_CLANG_ONLY__
-            struct {
-                T x;
-                T y;
-                T z;
-                T w;
-            };
-#else
-            hip_impl::Scalar_accessor<T, Native_vec_, 0> x;
-            hip_impl::Scalar_accessor<T, Native_vec_, 1> y;
-            hip_impl::Scalar_accessor<T, Native_vec_, 2> z;
-            hip_impl::Scalar_accessor<T, Native_vec_, 3> w;
-#endif
-        };
-
-        using value_type = T;
-
-        __host__ __device__
-        HIP_vector_base() = default;
-        __host__ __device__
-        explicit
-        constexpr
-        HIP_vector_base(T x) noexcept : data{x, x, x, x} {}
-        __host__ __device__
-        constexpr
-        HIP_vector_base(T x, T y, T z, T w) noexcept : data{x, y, z, w} {}
-        __host__ __device__
-        constexpr
-        HIP_vector_base(const HIP_vector_base&) = default;
-        __host__ __device__
-        constexpr
-        HIP_vector_base(HIP_vector_base&&) = default;
-        __host__ __device__
-        ~HIP_vector_base() = default;
-
-        __host__ __device__
-        HIP_vector_base& operator=(const HIP_vector_base& x) noexcept {
-            #if __has_attribute(ext_vector_type)
-                data = x.data;
-            #else
-                data[0] = x.data[0];
-                data[1] = x.data[1];
-                data[2] = x.data[2];
-                data[3] = x.data[3];
-            #endif
-
-            return *this;
-        }
-    };
-
-    template<typename T, unsigned int rank>
-    struct HIP_vector_type : public HIP_vector_base<T, rank> {
-        using HIP_vector_base<T, rank>::data;
-        using typename HIP_vector_base<T, rank>::Native_vec_;
-
-        __host__ __device__
-        HIP_vector_type() = default;
-        template<
-            typename U,
-            typename std::enable_if<
-                std::is_convertible<U, T>{}>::type* = nullptr>
-        __host__ __device__
-        explicit
-        constexpr
-        HIP_vector_type(U x) noexcept
-            : HIP_vector_base<T, rank>{static_cast<T>(x)}
-        {}
-        template< // TODO: constrain based on type as well.
-            typename... Us,
-            typename std::enable_if<
-                (rank > 1) && sizeof...(Us) == rank>::type* = nullptr>
-        __host__ __device__
-        constexpr
-        HIP_vector_type(Us... xs) noexcept
-            : HIP_vector_base<T, rank>{static_cast<T>(xs)...}
-        {}
-        __host__ __device__
-        constexpr
-        HIP_vector_type(const HIP_vector_type&) = default;
-        __host__ __device__
-        constexpr
-        HIP_vector_type(HIP_vector_type&&) = default;
-        __host__ __device__
-        ~HIP_vector_type() = default;
-
-        __host__ __device__
-        HIP_vector_type& operator=(const HIP_vector_type&) = default;
-        __host__ __device__
-        HIP_vector_type& operator=(HIP_vector_type&&) = default;
-
-        // Operators
-        __host__ __device__
-        HIP_vector_type& operator++() noexcept
-        {
-            return *this += HIP_vector_type{1};
-        }
-        __host__ __device__
-        HIP_vector_type operator++(int) noexcept
-        {
-            auto tmp(*this);
-            ++*this;
-            return tmp;
-        }
-
-        __host__ __device__
-        HIP_vector_type& operator--() noexcept
-        {
-            return *this -= HIP_vector_type{1};
-        }
-        __host__ __device__
-        HIP_vector_type operator--(int) noexcept
-        {
-            auto tmp(*this);
-            --*this;
-            return tmp;
-        }
-
-        __host__ __device__
-        HIP_vector_type& operator+=(const HIP_vector_type& x) noexcept
-        {
-            data += x.data;
-            return *this;
-        }
-        template<
-            typename U,
-            typename std::enable_if<
-                std::is_convertible<U, T>{}>::type* = nullptr>
-        __host__ __device__
-        HIP_vector_type& operator+=(U x) noexcept
-        {
-            return *this += HIP_vector_type{x};
-        }
-
-        __host__ __device__
-        HIP_vector_type& operator-=(const HIP_vector_type& x) noexcept
-        {
-            data -= x.data;
-            return *this;
-        }
-        template<
-            typename U,
-            typename std::enable_if<
-                std::is_convertible<U, T>{}>::type* = nullptr>
-        __host__ __device__
-        HIP_vector_type& operator-=(U x) noexcept
-        {
-            return *this -= HIP_vector_type{x};
-        }
-
-        __host__ __device__
-        HIP_vector_type& operator*=(const HIP_vector_type& x) noexcept
-        {
-            data *= x.data;
-            return *this;
-        }
-        template<
-            typename U,
-            typename std::enable_if<
-                std::is_convertible<U, T>{}>::type* = nullptr>
-        __host__ __device__
-        HIP_vector_type& operator*=(U x) noexcept
-        {
-            return *this *= HIP_vector_type{x};
-        }
-
-        __host__ __device__
-        HIP_vector_type& operator/=(const HIP_vector_type& x) noexcept
-        {
-            data /= x.data;
-            return *this;
-        }
-        template<
-            typename U,
-            typename std::enable_if<
-                std::is_convertible<U, T>{}>::type* = nullptr>
-        __host__ __device__
-        HIP_vector_type& operator/=(U x) noexcept
-        {
-            return *this /= HIP_vector_type{x};
-        }
-
-        template<
-            typename U = T,
-            typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
-        __host__ __device__
-        HIP_vector_type operator-() const noexcept
-        {
-            auto tmp(*this);
-            tmp.data = -tmp.data;
-            return tmp;
-        }
-
-        template<
-            typename U = T,
-            typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
-        __host__ __device__
-        HIP_vector_type operator~() const noexcept
-        {
-            HIP_vector_type r{*this};
-            r.data = ~r.data;
-            return r;
-        }
-
-        template<
-            typename U = T,
-            typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
-        __host__ __device__
-        HIP_vector_type& operator%=(const HIP_vector_type& x) noexcept
-        {
-            data %= x.data;
-            return *this;
-        }
-
-        template<
-            typename U = T,
-            typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
-        __host__ __device__
-        HIP_vector_type& operator^=(const HIP_vector_type& x) noexcept
-        {
-            data ^= x.data;
-            return *this;
-        }
-
-        template<
-            typename U = T,
-            typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
-        __host__ __device__
-        HIP_vector_type& operator|=(const HIP_vector_type& x) noexcept
-        {
-            data |= x.data;
-            return *this;
-        }
-
-        template<
-            typename U = T,
-            typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
-        __host__ __device__
-        HIP_vector_type& operator&=(const HIP_vector_type& x) noexcept
-        {
-            data &= x.data;
-            return *this;
-        }
-
-        template<
-            typename U = T,
-            typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
-        __host__ __device__
-        HIP_vector_type& operator>>=(const HIP_vector_type& x) noexcept
-        {
-            data >>= x.data;
-            return *this;
-        }
-
-        template<
-            typename U = T,
-            typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
-        __host__ __device__
-        HIP_vector_type& operator<<=(const HIP_vector_type& x) noexcept
-        {
-            data <<= x.data;
-            return *this;
-        }
-    };
-
-    template<typename T, unsigned int n>
-    __host__ __device__
-    inline
-    constexpr
-    HIP_vector_type<T, n> operator+(
-        const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
-    {
-        return HIP_vector_type<T, n>{x} += y;
-    }
-    template<typename T, unsigned int n, typename U>
-    __host__ __device__
-    inline
-    constexpr
-    HIP_vector_type<T, n> operator+(
-        const HIP_vector_type<T, n>& x, U y) noexcept
-    {
-        return HIP_vector_type<T, n>{x} += HIP_vector_type<T, n>{y};
-    }
-    template<typename T, unsigned int n, typename U>
-    __host__ __device__
-    inline
-    constexpr
-    HIP_vector_type<T, n> operator+(
-        U x, const HIP_vector_type<T, n>& y) noexcept
-    {
-        return HIP_vector_type<T, n>{x} += y;
-    }
-
-    template<typename T, unsigned int n>
-    __host__ __device__
-    inline
-    constexpr
-    HIP_vector_type<T, n> operator-(
-        const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
-    {
-        return HIP_vector_type<T, n>{x} -= y;
-    }
-    template<typename T, unsigned int n, typename U>
-    __host__ __device__
-    inline
-    constexpr
-    HIP_vector_type<T, n> operator-(
-        const HIP_vector_type<T, n>& x, U y) noexcept
-    {
-        return HIP_vector_type<T, n>{x} -= HIP_vector_type<T, n>{y};
-    }
-    template<typename T, unsigned int n, typename U>
-    __host__ __device__
-    inline
-    constexpr
-    HIP_vector_type<T, n> operator-(
-        U x, const HIP_vector_type<T, n>& y) noexcept
-    {
-        return HIP_vector_type<T, n>{x} -= y;
-    }
-
-    template<typename T, unsigned int n>
-    __host__ __device__
-    inline
-    constexpr
-    HIP_vector_type<T, n> operator*(
-        const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
-    {
-        return HIP_vector_type<T, n>{x} *= y;
-    }
-    template<typename T, unsigned int n, typename U>
-    __host__ __device__
-    inline
-    constexpr
-    HIP_vector_type<T, n> operator*(
-        const HIP_vector_type<T, n>& x, U y) noexcept
-    {
-        return HIP_vector_type<T, n>{x} *= HIP_vector_type<T, n>{y};
-    }
-    template<typename T, unsigned int n, typename U>
-    __host__ __device__
-    inline
-    constexpr
-    HIP_vector_type<T, n> operator*(
-        U x, const HIP_vector_type<T, n>& y) noexcept
-    {
-        return HIP_vector_type<T, n>{x} *= y;
-    }
-
-    template<typename T, unsigned int n>
-    __host__ __device__
-    inline
-    constexpr
-    HIP_vector_type<T, n> operator/(
-        const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
-    {
-        return HIP_vector_type<T, n>{x} /= y;
-    }
-    template<typename T, unsigned int n, typename U>
-    __host__ __device__
-    inline
-    constexpr
-    HIP_vector_type<T, n> operator/(
-        const HIP_vector_type<T, n>& x, U y) noexcept
-    {
-        return HIP_vector_type<T, n>{x} /= HIP_vector_type<T, n>{y};
-    }
-    template<typename T, unsigned int n, typename U>
-    __host__ __device__
-    inline
-    constexpr
-    HIP_vector_type<T, n> operator/(
-        U x, const HIP_vector_type<T, n>& y) noexcept
-    {
-        return HIP_vector_type<T, n>{x} /= y;
-    }
-
-    template<typename V>
-    __host__ __device__
-    inline
-    constexpr
-    bool _hip_any_zero(const V& x, int n) noexcept
-    {
-        return
-            (n == -1) ? true : ((x[n] == 0) ? false : _hip_any_zero(x, n - 1));
-    }
-
-    template<typename T, unsigned int n>
-    __host__ __device__
-    inline
-    constexpr
-    bool operator==(
-        const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
-    {
-        return _hip_any_zero(x.data == y.data, n - 1);
-    }
-    template<typename T, unsigned int n, typename U>
-    __host__ __device__
-    inline
-    constexpr
-    bool operator==(const HIP_vector_type<T, n>& x, U y) noexcept
-    {
-        return x == HIP_vector_type<T, n>{y};
-    }
-    template<typename T, unsigned int n, typename U>
-    __host__ __device__
-    inline
-    constexpr
-    bool operator==(U x, const HIP_vector_type<T, n>& y) noexcept
-    {
-        return HIP_vector_type<T, n>{x} == y;
-    }
-
-    template<typename T, unsigned int n>
-    __host__ __device__
-    inline
-    constexpr
-    bool operator!=(
-        const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
-    {
-        return !(x == y);
-    }
-    template<typename T, unsigned int n, typename U>
-    __host__ __device__
-    inline
-    constexpr
-    bool operator!=(const HIP_vector_type<T, n>& x, U y) noexcept
-    {
-        return !(x == y);
-    }
-    template<typename T, unsigned int n, typename U>
-    __host__ __device__
-    inline
-    constexpr
-    bool operator!=(U x, const HIP_vector_type<T, n>& y) noexcept
-    {
-        return !(x == y);
-    }
-
-    template<
-        typename T,
-        unsigned int n,
-        typename std::enable_if<std::is_integral<T>{}>* = nullptr>
-    __host__ __device__
-    inline
-    constexpr
-    HIP_vector_type<T, n> operator%(
-        const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
-    {
-        return HIP_vector_type<T, n>{x} %= y;
-    }
-    template<
-        typename T,
-        unsigned int n,
-        typename U,
-        typename std::enable_if<std::is_integral<T>{}>* = nullptr>
-    __host__ __device__
-    inline
-    constexpr
-    HIP_vector_type<T, n> operator%(
-        const HIP_vector_type<T, n>& x, U y) noexcept
-    {
-        return HIP_vector_type<T, n>{x} %= HIP_vector_type<T, n>{y};
-    }
-    template<
-        typename T,
-        unsigned int n,
-        typename U,
-        typename std::enable_if<std::is_integral<T>{}>* = nullptr>
-    __host__ __device__
-    inline
-    constexpr
-    HIP_vector_type<T, n> operator%(
-        U x, const HIP_vector_type<T, n>& y) noexcept
-    {
-        return HIP_vector_type<T, n>{x} %= y;
-    }
-
-    template<
-        typename T,
-        unsigned int n,
-        typename std::enable_if<std::is_integral<T>{}>* = nullptr>
-    __host__ __device__
-    inline
-    constexpr
-    HIP_vector_type<T, n> operator^(
-        const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
-    {
-        return HIP_vector_type<T, n>{x} ^= y;
-    }
-    template<
-        typename T,
-        unsigned int n,
-        typename U,
-        typename std::enable_if<std::is_integral<T>{}>* = nullptr>
-    __host__ __device__
-    inline
-    constexpr
-    HIP_vector_type<T, n> operator^(
-        const HIP_vector_type<T, n>& x, U y) noexcept
-    {
-        return HIP_vector_type<T, n>{x} ^= HIP_vector_type<T, n>{y};
-    }
-    template<
-        typename T,
-        unsigned int n,
-        typename U,
-        typename std::enable_if<std::is_integral<T>{}>* = nullptr>
-    __host__ __device__
-    inline
-    constexpr
-    HIP_vector_type<T, n> operator^(
-        U x, const HIP_vector_type<T, n>& y) noexcept
-    {
-        return HIP_vector_type<T, n>{x} ^= y;
-    }
-
-    template<
-        typename T,
-        unsigned int n,
-        typename std::enable_if<std::is_integral<T>{}>* = nullptr>
-    __host__ __device__
-    inline
-    constexpr
-    HIP_vector_type<T, n> operator|(
-        const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
-    {
-        return HIP_vector_type<T, n>{x} |= y;
-    }
-    template<
-        typename T,
-        unsigned int n,
-        typename U,
-        typename std::enable_if<std::is_integral<T>{}>* = nullptr>
-    __host__ __device__
-    inline
-    constexpr
-    HIP_vector_type<T, n> operator|(
-        const HIP_vector_type<T, n>& x, U y) noexcept
-    {
-        return HIP_vector_type<T, n>{x} |= HIP_vector_type<T, n>{y};
-    }
-    template<
-        typename T,
-        unsigned int n,
-        typename U,
-        typename std::enable_if<std::is_integral<T>{}>* = nullptr>
-    __host__ __device__
-    inline
-    constexpr
-    HIP_vector_type<T, n> operator|(
-        U x, const HIP_vector_type<T, n>& y) noexcept
-    {
-        return HIP_vector_type<T, n>{x} |= y;
-    }
-
-    template<
-        typename T,
-        unsigned int n,
-        typename std::enable_if<std::is_integral<T>{}>* = nullptr>
-    __host__ __device__
-    inline
-    constexpr
-    HIP_vector_type<T, n> operator&(
-        const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
-    {
-        return HIP_vector_type<T, n>{x} &= y;
-    }
-    template<
-        typename T,
-        unsigned int n,
-        typename U,
-        typename std::enable_if<std::is_integral<T>{}>* = nullptr>
-    __host__ __device__
-    inline
-    constexpr
-    HIP_vector_type<T, n> operator&(
-        const HIP_vector_type<T, n>& x, U y) noexcept
-    {
-        return HIP_vector_type<T, n>{x} &= HIP_vector_type<T, n>{y};
-    }
-    template<
-        typename T,
-        unsigned int n,
-        typename U,
-        typename std::enable_if<std::is_integral<T>{}>* = nullptr>
-    __host__ __device__
-    inline
-    constexpr
-    HIP_vector_type<T, n> operator&(
-        U x, const HIP_vector_type<T, n>& y) noexcept
-    {
-        return HIP_vector_type<T, n>{x} &= y;
-    }
-
-    template<
-        typename T,
-        unsigned int n,
-        typename std::enable_if<std::is_integral<T>{}>* = nullptr>
-    __host__ __device__
-    inline
-    constexpr
-    HIP_vector_type<T, n> operator>>(
-        const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
-    {
-        return HIP_vector_type<T, n>{x} >>= y;
-    }
-    template<
-        typename T,
-        unsigned int n,
-        typename U,
-        typename std::enable_if<std::is_integral<T>{}>* = nullptr>
-    __host__ __device__
-    inline
-    constexpr
-    HIP_vector_type<T, n> operator>>(
-        const HIP_vector_type<T, n>& x, U y) noexcept
-    {
-        return HIP_vector_type<T, n>{x} >>= HIP_vector_type<T, n>{y};
-    }
-    template<
-        typename T,
-        unsigned int n,
-        typename U,
-        typename std::enable_if<std::is_integral<T>{}>* = nullptr>
-    __host__ __device__
-    inline
-    constexpr
-    HIP_vector_type<T, n> operator>>(
-        U x, const HIP_vector_type<T, n>& y) noexcept
-    {
-        return HIP_vector_type<T, n>{x} >>= y;
-    }
-
-    template<
-        typename T,
-        unsigned int n,
-        typename std::enable_if<std::is_integral<T>{}>* = nullptr>
-    __host__ __device__
-    inline
-    constexpr
-    HIP_vector_type<T, n> operator<<(
-        const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
-    {
-        return HIP_vector_type<T, n>{x} <<= y;
-    }
-    template<
-        typename T,
-        unsigned int n,
-        typename U,
-        typename std::enable_if<std::is_integral<T>{}>* = nullptr>
-    __host__ __device__
-    inline
-    constexpr
-    HIP_vector_type<T, n> operator<<(
-        const HIP_vector_type<T, n>& x, U y) noexcept
-    {
-        return HIP_vector_type<T, n>{x} <<= HIP_vector_type<T, n>{y};
-    }
-    template<
-        typename T,
-        unsigned int n,
-        typename U,
-        typename std::enable_if<std::is_arithmetic<U>::value>::type,
-        typename std::enable_if<std::is_integral<T>{}>* = nullptr>
-    __host__ __device__
-    inline
-    constexpr
-    HIP_vector_type<T, n> operator<<(
-        U x, const HIP_vector_type<T, n>& y) noexcept
-    {
-        return HIP_vector_type<T, n>{x} <<= y;
-    }
-
-    #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
-        using CUDA_name##1 = HIP_vector_type<T, 1>;\
-        using CUDA_name##2 = HIP_vector_type<T, 2>;\
-        using CUDA_name##3 = HIP_vector_type<T, 3>;\
-        using CUDA_name##4 = HIP_vector_type<T, 4>;
-#else
-    #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
-        typedef struct {\
-            T x;\
-        } CUDA_name##1;\
-        typedef struct {\
-            T x;\
-            T y;\
-        } CUDA_name##2;\
-        typedef struct {\
-            T x;\
-            T y;\
-            T z;\
-        } CUDA_name##3;\
-        typedef struct {\
-            T x;\
-            T y;\
-            T z;\
-            T w;\
-        } CUDA_name##4;
-#endif
-
-__MAKE_VECTOR_TYPE__(uchar, unsigned char);
-__MAKE_VECTOR_TYPE__(char, char);
-__MAKE_VECTOR_TYPE__(ushort, unsigned short);
-__MAKE_VECTOR_TYPE__(short, short);
-__MAKE_VECTOR_TYPE__(uint, unsigned int);
-__MAKE_VECTOR_TYPE__(int, int);
-__MAKE_VECTOR_TYPE__(ulong, unsigned long);
-__MAKE_VECTOR_TYPE__(long, long);
-__MAKE_VECTOR_TYPE__(ulonglong, unsigned long long);
-__MAKE_VECTOR_TYPE__(longlong, long long);
-__MAKE_VECTOR_TYPE__(float, float);
-__MAKE_VECTOR_TYPE__(double, double);
-
-#ifdef __cplusplus
-#define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
-    static inline __device__ __host__ \
-    type make_##type(comp x) { type r{x}; return r; }
-
-#define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
-    static inline __device__ __host__ \
-    type make_##type(comp x, comp y) { type r{x, y}; return r; }
-
-#define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
-    static inline __device__ __host__ \
-    type make_##type(comp x, comp y, comp z) { type r{x, y, z}; return r; }
-
-#define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
-    static inline __device__ __host__ \
-    type make_##type(comp x, comp y, comp z, comp w) { \
-        type r{x, y, z, w}; \
-        return r; \
-    }
-#else
- #define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
-     static inline __device__ __host__ \
-     type make_##type(comp x) { type r; r.x =x; return r; }
-
- #define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
-     static inline __device__ __host__ \
-     type make_##type(comp x, comp y) { type r; r.x=x; r.y=y; return r; }
-
- #define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
-     static inline __device__ __host__ \
-     type make_##type(comp x, comp y, comp z) { type r; r.x=x; r.y=y; r.z=z; return r; }
-
- #define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
-     static inline __device__ __host__ \
-     type make_##type(comp x, comp y, comp z, comp w) { \
-         type r; r.x=x; r.y=y; r.z=z; r.w=w; \
-         return r; \
-     }
-#endif
-
-DECLOP_MAKE_ONE_COMPONENT(unsigned char, uchar1);
-DECLOP_MAKE_TWO_COMPONENT(unsigned char, uchar2);
-DECLOP_MAKE_THREE_COMPONENT(unsigned char, uchar3);
-DECLOP_MAKE_FOUR_COMPONENT(unsigned char, uchar4);
-
-DECLOP_MAKE_ONE_COMPONENT(signed char, char1);
-DECLOP_MAKE_TWO_COMPONENT(signed char, char2);
-DECLOP_MAKE_THREE_COMPONENT(signed char, char3);
-DECLOP_MAKE_FOUR_COMPONENT(signed char, char4);
-
-DECLOP_MAKE_ONE_COMPONENT(unsigned short, ushort1);
-DECLOP_MAKE_TWO_COMPONENT(unsigned short, ushort2);
-DECLOP_MAKE_THREE_COMPONENT(unsigned short, ushort3);
-DECLOP_MAKE_FOUR_COMPONENT(unsigned short, ushort4);
-
-DECLOP_MAKE_ONE_COMPONENT(signed short, short1);
-DECLOP_MAKE_TWO_COMPONENT(signed short, short2);
-DECLOP_MAKE_THREE_COMPONENT(signed short, short3);
-DECLOP_MAKE_FOUR_COMPONENT(signed short, short4);
-
-DECLOP_MAKE_ONE_COMPONENT(unsigned int, uint1);
-DECLOP_MAKE_TWO_COMPONENT(unsigned int, uint2);
-DECLOP_MAKE_THREE_COMPONENT(unsigned int, uint3);
-DECLOP_MAKE_FOUR_COMPONENT(unsigned int, uint4);
-
-DECLOP_MAKE_ONE_COMPONENT(signed int, int1);
-DECLOP_MAKE_TWO_COMPONENT(signed int, int2);
-DECLOP_MAKE_THREE_COMPONENT(signed int, int3);
-DECLOP_MAKE_FOUR_COMPONENT(signed int, int4);
-
-DECLOP_MAKE_ONE_COMPONENT(float, float1);
-DECLOP_MAKE_TWO_COMPONENT(float, float2);
-DECLOP_MAKE_THREE_COMPONENT(float, float3);
-DECLOP_MAKE_FOUR_COMPONENT(float, float4);
-
-DECLOP_MAKE_ONE_COMPONENT(double, double1);
-DECLOP_MAKE_TWO_COMPONENT(double, double2);
-DECLOP_MAKE_THREE_COMPONENT(double, double3);
-DECLOP_MAKE_FOUR_COMPONENT(double, double4);
-
-DECLOP_MAKE_ONE_COMPONENT(unsigned long, ulong1);
-DECLOP_MAKE_TWO_COMPONENT(unsigned long, ulong2);
-DECLOP_MAKE_THREE_COMPONENT(unsigned long, ulong3);
-DECLOP_MAKE_FOUR_COMPONENT(unsigned long, ulong4);
-
-DECLOP_MAKE_ONE_COMPONENT(signed long, long1);
-DECLOP_MAKE_TWO_COMPONENT(signed long, long2);
-DECLOP_MAKE_THREE_COMPONENT(signed long, long3);
-DECLOP_MAKE_FOUR_COMPONENT(signed long, long4);
-
-DECLOP_MAKE_ONE_COMPONENT(unsigned long long, ulonglong1);
-DECLOP_MAKE_TWO_COMPONENT(unsigned long long, ulonglong2);
-DECLOP_MAKE_THREE_COMPONENT(unsigned long long, ulonglong3);
-DECLOP_MAKE_FOUR_COMPONENT(unsigned long long, ulonglong4);
-
-DECLOP_MAKE_ONE_COMPONENT(signed long long, longlong1);
-DECLOP_MAKE_TWO_COMPONENT(signed long long, longlong2);
-DECLOP_MAKE_THREE_COMPONENT(signed long long, longlong3);
-DECLOP_MAKE_FOUR_COMPONENT(signed long long, longlong4);
-#else // !defined(__has_attribute)
-
-#if defined(_MSC_VER)
-#include <mmintrin.h>
-#include <xmmintrin.h>
-#include <emmintrin.h>
-#include <immintrin.h>
-
-typedef union { char data; } char1;
-typedef union { char data[2]; } char2;
-typedef union { char data[4]; } char4;
-typedef union { char4 data; } char3;
-typedef union { __m64 data; } char8;
-typedef union { __m128i data; } char16;
-
-typedef union { unsigned char data; } uchar1;
-typedef union { unsigned char data[2]; } uchar2;
-typedef union { unsigned char data[4]; } uchar4;
-typedef union { uchar4 data; } uchar3;
-typedef union { __m64 data; } uchar8;
-typedef union { __m128i data; } uchar16;
-
-typedef union { short data; } short1;
-typedef union { short data[2]; } short2;
-typedef union { __m64 data; } short4;
-typedef union { short4 data; } short3;
-typedef union { __m128i data; } short8;
-typedef union { __m128i data[2]; } short16;
-
-typedef union { unsigned short data; } ushort1;
-typedef union { unsigned short data[2]; } ushort2;
-typedef union { __m64 data; } ushort4;
-typedef union { ushort4 data; } ushort3;
-typedef union { __m128i data; } ushort8;
-typedef union { __m128i data[2]; } ushort16;
-
-typedef union { int data; } int1;
-typedef union { __m64 data; } int2;
-typedef union { __m128i data; } int4;
-typedef union { int4 data; } int3;
-typedef union { __m128i data[2]; } int8;
-typedef union { __m128i data[4];} int16;
-
-typedef union { unsigned int data; } uint1;
-typedef union { __m64 data; } uint2;
-typedef union { __m128i data; } uint4;
-typedef union { uint4 data; } uint3;
-typedef union { __m128i data[2]; } uint8;
-typedef union { __m128i data[4]; } uint16;
-
-#if !defined(_WIN64)
-typedef union { int data; } long1;
-typedef union { __m64 data; } long2;
-typedef union { __m128i data; } long4;
-typedef union { long4 data; } long3;
-typedef union { __m128i data[2]; } long8;
-typedef union { __m128i data[4]; } long16;
-
-typedef union { unsigned int data; } ulong1;
-typedef union { __m64 data; } ulong2;
-typedef union { __m128i data; } ulong4;
-typedef union { ulong4 data; } ulong3;
-typedef union { __m128i data[2]; } ulong8;
-typedef union { __m128i data[4]; } ulong16;
-#else // defined(_WIN64)
-typedef union { __m64 data; } long1;
-typedef union { __m128i data; } long2;
-typedef union { __m128i data[2]; } long4;
-typedef union { long4 data; } long3;
-typedef union { __m128i data[4]; } long8;
-typedef union { __m128i data[8]; } long16;
-
-typedef union { __m64 data; } ulong1;
-typedef union { __m128i data; } ulong2;
-typedef union { __m128i data[2]; } ulong4;
-typedef union { ulong4 data; } ulong3;
-typedef union { __m128i data[4]; } ulong8;
-typedef union { __m128i data[8]; } ulong16;
-#endif // defined(_WIN64)
-
-typedef union { __m64 data; } longlong1;
-typedef union { __m128i data; } longlong2;
-typedef union { __m128i data[2]; } longlong4;
-typedef union { longlong4 data; } longlong3;
-typedef union { __m128i data[4]; } longlong8;
-typedef union { __m128i data[8]; } longlong16;
-
-typedef union { __m64 data; } ulonglong1;
-typedef union { __m128i data; } ulonglong2;
-typedef union { __m128i data[2]; } ulonglong4;
-typedef union { ulonglong4 data; } ulonglong3;
-typedef union { __m128i data[4]; } ulonglong8;
-typedef union { __m128i data[8]; } ulonglong16;
-
-typedef union { float data; } float1;
-typedef union { __m64 data; } float2;
-typedef union { __m128 data; } float4;
-typedef union { float4 data; } float3;
-typedef union { __m256 data; } float8;
-typedef union { __m256 data[2]; } float16;
-
-typedef union { double data; } double1;
-typedef union { __m128d data; } double2;
-typedef union { __m256d data; } double4;
-typedef union { double4 data; } double3;
-typedef union { __m256d data[2]; } double8;
-typedef union { __m256d data[4]; } double16;
-
-#else // !defined(_MSC_VER)
-
-typedef union { char data; } char1;
-typedef union { char data[2]; } char2;
-typedef union { char data[4]; } char4;
-typedef union { char data[8]; } char8;
-typedef union { char data[16]; } char16;
-typedef union { char4 data; } char3;
-
-typedef union { unsigned char data; } uchar1;
-typedef union { unsigned char data[2]; } uchar2;
-typedef union { unsigned char data[4]; } uchar4;
-typedef union { unsigned char data[8]; } uchar8;
-typedef union { unsigned char data[16]; } uchar16;
-typedef union { uchar4 data; } uchar3;
-
-typedef union { short data; } short1;
-typedef union { short data[2]; } short2;
-typedef union { short data[4]; } short4;
-typedef union { short data[8]; } short8;
-typedef union { short data[16]; } short16;
-typedef union { short4 data; } short3;
-
-typedef union { unsigned short data; } ushort1;
-typedef union { unsigned short data[2]; } ushort2;
-typedef union { unsigned short data[4]; } ushort4;
-typedef union { unsigned short data[8]; } ushort8;
-typedef union { unsigned short data[16]; } ushort16;
-typedef union { ushort4 data; } ushort3;
-
-typedef union { int data; } int1;
-typedef union { int data[2]; } int2;
-typedef union { int data[4]; } int4;
-typedef union { int data[8]; } int8;
-typedef union { int data[16]; } int16;
-typedef union { int4 data; } int3;
-
-typedef union { unsigned int data; } uint1;
-typedef union { unsigned int data[2]; } uint2;
-typedef union { unsigned int data[4]; } uint4;
-typedef union { unsigned int data[8]; } uint8;
-typedef union { unsigned int data[16]; } uint16;
-typedef union { uint4 data; } uint3;
-
-typedef union { long data; } long1;
-typedef union { long data[2]; } long2;
-typedef union { long data[4]; } long4;
-typedef union { long data[8]; } long8;
-typedef union { long data[16]; } long16;
-typedef union { long4 data; } long3;
-
-typedef union { unsigned long data; } ulong1;
-typedef union { unsigned long data[2]; } ulong2;
-typedef union { unsigned long data[4]; } ulong4;
-typedef union { unsigned long data[8]; } ulong8;
-typedef union { unsigned long data[16]; } ulong16;
-typedef union { ulong4 data; } ulong3;
-
-typedef union { long long data; } longlong1;
-typedef union { long long data[2]; } longlong2;
-typedef union { long long data[4]; } longlong4;
-typedef union { long long data[8]; } longlong8;
-typedef union { long long data[16]; } longlong16;
-typedef union { longlong4 data; } longlong3;
-
-typedef union { unsigned long long data; } ulonglong1;
-typedef union { unsigned long long data[2]; } ulonglong2;
-typedef union { unsigned long long data[4]; } ulonglong4;
-typedef union { unsigned long long data[8]; } ulonglong8;
-typedef union { unsigned long long data[16]; } ulonglong16;
-typedef union { ulonglong4 data; } ulonglong3;
-
-typedef union { float data; } float1;
-typedef union { float data[2]; } float2;
-typedef union { float data[4]; } float4;
-typedef union { float data[8]; } float8;
-typedef union { float data[16]; } float16;
-typedef union { float4 data; } float3;
-
-typedef union { double data; } double1;
-typedef union { double data[2]; } double2;
-typedef union { double data[4]; } double4;
-typedef union { double data[8]; } double8;
-typedef union { double data[16]; } double16;
-typedef union { double4 data; } double3;
-
-#endif // defined(_MSC_VER)
-#endif // defined(__has_attribute)
-#endif
diff --git a/third_party/rocm/include/hip/hcc_detail/hiprtc.h b/third_party/rocm/include/hip/hcc_detail/hiprtc.h
deleted file mode 100644
index fecea75..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hiprtc.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-#ifndef HIPRTC_H
-#define HIPRTC_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-#include <stdlib.h>
-
-#if !defined(_WIN32)
-#pragma GCC visibility push (default)
-#endif
-
-enum hiprtcResult {
-    HIPRTC_SUCCESS = 0,
-    HIPRTC_ERROR_OUT_OF_MEMORY = 1,
-    HIPRTC_ERROR_PROGRAM_CREATION_FAILURE = 2,
-    HIPRTC_ERROR_INVALID_INPUT = 3,
-    HIPRTC_ERROR_INVALID_PROGRAM = 4,
-    HIPRTC_ERROR_INVALID_OPTION = 5,
-    HIPRTC_ERROR_COMPILATION = 6,
-    HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7,
-    HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8,
-    HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9,
-    HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10,
-    HIPRTC_ERROR_INTERNAL_ERROR = 11
-};
-
-const char* hiprtcGetErrorString(hiprtcResult result);
-
-
-hiprtcResult hiprtcVersion(int* major, int* minor);
-
-typedef struct _hiprtcProgram* hiprtcProgram;
-
-hiprtcResult hiprtcAddNameExpression(hiprtcProgram prog,
-                                     const char* name_expression);
-
-hiprtcResult hiprtcCompileProgram(hiprtcProgram prog,
-                                  int numOptions,
-                                  const char** options);
-
-hiprtcResult hiprtcCreateProgram(hiprtcProgram* prog,
-                                 const char* src,
-                                 const char* name,
-                                 int numHeaders,
-                                 const char** headers,
-                                 const char** includeNames);
-
-hiprtcResult hiprtcDestroyProgram(hiprtcProgram* prog);
-
-hiprtcResult hiprtcGetLoweredName(hiprtcProgram prog,
-                                  const char* name_expression,
-                                  const char** lowered_name);
-
-hiprtcResult hiprtcGetProgramLog(hiprtcProgram prog, char* log);
-
-hiprtcResult hiprtcGetProgramLogSize(hiprtcProgram prog,
-                                     size_t* logSizeRet);
-
-hiprtcResult hiprtcGetCode(hiprtcProgram prog, char* code);
-
-hiprtcResult hiprtcGetCodeSize(hiprtcProgram prog, size_t* codeSizeRet);
-
-#if !defined(_WIN32)
-#pragma GCC visibility pop
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif //HIPRTC_H
diff --git a/third_party/rocm/include/hip/hcc_detail/host_defines.h b/third_party/rocm/include/hip/hcc_detail/host_defines.h
deleted file mode 100644
index 72f3932..0000000
--- a/third_party/rocm/include/hip/hcc_detail/host_defines.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/**
- *  @file  hcc_detail/host_defines.h
- *  @brief TODO-doc
- */
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HOST_DEFINES_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_HOST_DEFINES_H
-
-
-// Add guard to Generic Grid Launch method
-#ifndef GENERIC_GRID_LAUNCH
-#define GENERIC_GRID_LAUNCH 1
-#endif
-
-#ifdef __HCC__
-/**
- * Function and kernel markers
- */
-#define __host__ __attribute__((cpu))
-#define __device__ __attribute__((hc))
-
-#if GENERIC_GRID_LAUNCH == 0
-#define __global__ __attribute__((hc_grid_launch)) __attribute__((used))
-#else
-#if __hcc_workweek__ >= 17481
-#define __global__ __attribute__((annotate("__HIP_global_function__"), cpu, hc, used))
-#else
-#define __global__ __attribute__((hc, used))
-#endif
-#endif  // GENERIC_GRID_LAUNCH
-
-#define __noinline__ __attribute__((noinline))
-#define __forceinline__ inline __attribute__((always_inline))
-
-
-/*
- * Variable Type Qualifiers:
- */
-// _restrict is supported by the compiler
-#define __shared__ tile_static
-#define __constant__ __attribute__((hc, annotate("__HIP_constant__")))
-
-#elif defined(__clang__) && defined(__HIP__)
-
-#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
-#define __host__ __attribute__((host))
-#define __device__ __attribute__((device))
-#define __global__ __attribute__((global))
-#define __shared__ __attribute__((shared))
-#define __constant__ __attribute__((constant))
-#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
-
-#define __noinline__ __attribute__((noinline))
-#define __forceinline__ inline __attribute__((always_inline))
-
-#else
-
-// Non-HCC compiler
-/**
- * Function and kernel markers
- */
-#define __host__
-#define __device__
-
-#define __global__
-
-#define __noinline__
-#define __forceinline__ inline
-
-#define __shared__
-#define __constant__
-
-#endif
-
-#endif
diff --git a/third_party/rocm/include/hip/hcc_detail/hsa_helpers.hpp b/third_party/rocm/include/hip/hcc_detail/hsa_helpers.hpp
deleted file mode 100644
index af4f0c9..0000000
--- a/third_party/rocm/include/hip/hcc_detail/hsa_helpers.hpp
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-#pragma once
-
-#include <hsa/hsa.h>
-
-#include <cstdint>
-#include <functional>
-#include <string>
-
-namespace hip_impl {
-inline void* address(hsa_executable_symbol_t x) {
-    void* r = nullptr;
-    hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &r);
-
-    return r;
-}
-
-inline hsa_agent_t agent(hsa_executable_symbol_t x) {
-    hsa_agent_t r = {};
-    hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_AGENT, &r);
-
-    return r;
-}
-
-inline std::uint32_t group_size(hsa_executable_symbol_t x) {
-    std::uint32_t r = 0u;
-    hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, &r);
-
-    return r;
-}
-
-inline hsa_isa_t isa(hsa_agent_t x) {
-    hsa_isa_t r = {};
-    hsa_agent_iterate_isas(x,
-                           [](hsa_isa_t i, void* o) {
-                               *static_cast<hsa_isa_t*>(o) = i;  // Pick the first.
-
-                               return HSA_STATUS_INFO_BREAK;
-                           },
-                           &r);
-
-    return r;
-}
-
-inline std::uint64_t kernel_object(hsa_executable_symbol_t x) {
-    std::uint64_t r = 0u;
-    hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &r);
-
-    return r;
-}
-
-inline std::string name(hsa_executable_symbol_t x) {
-    std::uint32_t sz = 0u;
-    hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &sz);
-
-    std::string r(sz, '\0');
-    hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_NAME, &r.front());
-
-    return r;
-}
-
-inline std::uint32_t private_size(hsa_executable_symbol_t x) {
-    std::uint32_t r = 0u;
-    hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, &r);
-
-    return r;
-}
-
-inline std::uint32_t size(hsa_executable_symbol_t x) {
-    std::uint32_t r = 0;
-    hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE, &r);
-
-    return r;
-}
-
-inline hsa_symbol_kind_t type(hsa_executable_symbol_t x) {
-    hsa_symbol_kind_t r = {};
-    hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &r);
-
-    return r;
-}
-}  // namespace hip_impl
\ No newline at end of file
diff --git a/third_party/rocm/include/hip/hcc_detail/library_types.h b/third_party/rocm/include/hip/hcc_detail/library_types.h
deleted file mode 100644
index 6fcd0dc..0000000
--- a/third_party/rocm/include/hip/hcc_detail/library_types.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_LIBRARY_TYPES_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_LIBRARY_TYPES_H
-
-typedef enum hipDataType {
-  HIP_R_16F = 2,
-  HIP_R_32F = 0,
-  HIP_R_64F = 1,
-  HIP_C_16F = 6,
-  HIP_C_32F = 4,
-  HIP_C_64F = 5
-} hipDataType;
-
-typedef enum hipLibraryPropertyType {
-  HIP_LIBRARY_MAJOR_VERSION,
-  HIP_LIBRARY_MINOR_VERSION,
-  HIP_LIBRARY_PATCH_LEVEL
-} hipLibraryPropertyType;
-
-#endif
diff --git a/third_party/rocm/include/hip/hcc_detail/llvm_intrinsics.h b/third_party/rocm/include/hip/hcc_detail/llvm_intrinsics.h
deleted file mode 100644
index 330b3d9..0000000
--- a/third_party/rocm/include/hip/hcc_detail/llvm_intrinsics.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/**
- *  @file  hcc_detail/llvm_intrinsics.h
- *  @brief Contains declarations for wrapper functions for llvm intrinsics
- *         like llvm.amdgcn.s.barrier.
- */
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_LLVM_INTRINSICS_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_LLVM_INTRINSICS_H
-
-#include "hip/hcc_detail/host_defines.h"
-
-// FIXME: These should all be removed and proper builtins used.
-__device__
-unsigned __llvm_amdgcn_groupstaticsize() __asm("llvm.amdgcn.groupstaticsize");
-
-__device__
-int __llvm_amdgcn_ds_swizzle(int index, int pattern) __asm("llvm.amdgcn.ds.swizzle");
-
-#endif
diff --git a/third_party/rocm/include/hip/hcc_detail/macro_based_grid_launch.hpp b/third_party/rocm/include/hip/hcc_detail/macro_based_grid_launch.hpp
deleted file mode 100644
index 96d449b..0000000
--- a/third_party/rocm/include/hip/hcc_detail/macro_based_grid_launch.hpp
+++ /dev/null
@@ -1,798 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#pragma once
-
-#include "concepts.hpp"
-#include "helpers.hpp"
-
-#include "hc.hpp"
-#include "hip/hip_ext.h"
-#include "hip_runtime.h"
-
-#include <functional>
-#include <iostream>
-#include <stdexcept>
-#include <type_traits>
-#include <utility>
-
-namespace hip_impl {
-namespace {
-struct New_grid_launch_tag {};
-struct Old_grid_launch_tag {};
-
-template <typename C, typename D>
-class RAII_guard {
-    D dtor_;
-
-   public:
-    RAII_guard() = default;
-
-    RAII_guard(const C& ctor, D dtor) : dtor_{std::move(dtor)} { ctor(); }
-
-    RAII_guard(const RAII_guard&) = default;
-    RAII_guard(RAII_guard&&) = default;
-
-    RAII_guard& operator=(const RAII_guard&) = default;
-    RAII_guard& operator=(RAII_guard&&) = default;
-
-    ~RAII_guard() { dtor_(); }
-};
-
-template <typename C, typename D>
-RAII_guard<C, D> make_RAII_guard(const C& ctor, D dtor) {
-    return RAII_guard<C, D>{ctor, std::move(dtor)};
-}
-
-template <FunctionalProcedure F, typename... Ts>
-using is_new_grid_launch_t = typename std::conditional<is_callable<F(Ts...)>{}, New_grid_launch_tag,
-                                                       Old_grid_launch_tag>::type;
-}  // namespace
-
-// TODO: - dispatch rank should be derived from the domain dimensions passed
-//         in, and not always assumed to be 3;
-
-template <FunctionalProcedure K, typename... Ts>
-requires(Domain<K> ==
-         {Ts...}) inline void grid_launch_hip_impl_(New_grid_launch_tag, dim3 num_blocks,
-                                                    dim3 dim_blocks, int group_mem_bytes,
-                                                    const hc::accelerator_view& acc_v, K k) {
-    const auto d =
-        hc::extent<3>{num_blocks.z * dim_blocks.z, num_blocks.y * dim_blocks.y,
-                      num_blocks.x * dim_blocks.x}
-            .tile_with_dynamic(dim_blocks.z, dim_blocks.y, dim_blocks.x, group_mem_bytes);
-
-    try {
-        hc::parallel_for_each(acc_v, d, k);
-    } catch (std::exception& ex) {
-        std::cerr << "Failed in " << __func__ << ", with exception: " << ex.what() << std::endl;
-        hip_throw(ex);
-    }
-}
-
-// TODO: these are workarounds, they should be removed.
-
-hc::accelerator_view lock_stream_hip_(hipStream_t&, void*&);
-void print_prelaunch_trace_(const char*, dim3, dim3, int, hipStream_t);
-void unlock_stream_hip_(hipStream_t, void*, const char*, hc::accelerator_view*);
-
-template <FunctionalProcedure K, typename... Ts>
-requires(Domain<K> == {Ts...}) inline void grid_launch_hip_impl_(New_grid_launch_tag,
-                                                                 dim3 num_blocks, dim3 dim_blocks,
-                                                                 int group_mem_bytes,
-                                                                 hipStream_t stream,
-                                                                 const char* kernel_name, K k) {
-    void* lck_stream = nullptr;
-    auto acc_v = lock_stream_hip_(stream, lck_stream);
-    auto stream_guard =
-        make_RAII_guard(std::bind(print_prelaunch_trace_, kernel_name, num_blocks, dim_blocks,
-                                  group_mem_bytes, stream),
-                        std::bind(unlock_stream_hip_, stream, lck_stream, kernel_name, &acc_v));
-
-    try {
-        grid_launch_hip_impl_(New_grid_launch_tag{}, std::move(num_blocks), std::move(dim_blocks),
-                              group_mem_bytes, acc_v, std::move(k));
-    } catch (std::exception& ex) {
-        std::cerr << "Failed in " << __func__ << ", with exception: " << ex.what() << std::endl;
-        hip_throw(ex);
-    }
-}
-
-template <FunctionalProcedure K, typename... Ts>
-requires(Domain<K> ==
-         {hipLaunchParm, Ts...}) inline void grid_launch_hip_impl_(Old_grid_launch_tag,
-                                                                   dim3 num_blocks, dim3 dim_blocks,
-                                                                   int group_mem_bytes,
-                                                                   hipStream_t stream, K k) {
-    grid_launch_hip_impl_(New_grid_launch_tag{}, std::move(num_blocks), std::move(dim_blocks),
-                          group_mem_bytes, std::move(stream), std::move(k));
-}
-
-template <FunctionalProcedure K, typename... Ts>
-requires(Domain<K> == {hipLaunchParm, Ts...}) inline void grid_launch_hip_impl_(
-    Old_grid_launch_tag, dim3 num_blocks, dim3 dim_blocks, int group_mem_bytes, hipStream_t stream,
-    const char* kernel_name, K k) {
-    grid_launch_hip_impl_(New_grid_launch_tag{}, std::move(num_blocks), std::move(dim_blocks),
-                          group_mem_bytes, std::move(stream), kernel_name, std::move(k));
-}
-
-template <FunctionalProcedure K, typename... Ts>
-requires(Domain<K> == {Ts...}) inline std::enable_if_t<
-    !std::is_function<K>::value> grid_launch_hip_(dim3 num_blocks, dim3 dim_blocks,
-                                                  int group_mem_bytes, hipStream_t stream,
-                                                  const char* kernel_name, K k) {
-    grid_launch_hip_impl_(is_new_grid_launch_t<K, Ts...>{}, std::move(num_blocks),
-                          std::move(dim_blocks), group_mem_bytes, std::move(stream), kernel_name,
-                          std::move(k));
-}
-
-template <FunctionalProcedure K, typename... Ts>
-requires(Domain<K> == {Ts...}) inline std::enable_if_t<
-    !std::is_function<K>::value> grid_launch_hip_(dim3 num_blocks, dim3 dim_blocks,
-                                                  int group_mem_bytes, hipStream_t stream, K k) {
-    grid_launch_hip_impl_(is_new_grid_launch_t<K, Ts...>{}, std::move(num_blocks),
-                          std::move(dim_blocks), group_mem_bytes, std::move(stream), std::move(k));
-}
-
-// TODO: these are temporary and purposefully noisy and disruptive.
-#define make_kernel_name_hip(k, n)                                                                 \
-    HIP_kernel_functor_name_begin##_##k##_##HIP_kernel_functor_name_end##_##n
-
-#define make_kernel_functor_hip_30(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
-                                   p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, \
-                                   p22, p23, p24, p25, p26, p27)                                   \
-    struct make_kernel_name_hip(function_name, 28) {                                               \
-        std::decay_t<decltype(p0)> _p0_;                                                           \
-        std::decay_t<decltype(p1)> _p1_;                                                           \
-        std::decay_t<decltype(p2)> _p2_;                                                           \
-        std::decay_t<decltype(p3)> _p3_;                                                           \
-        std::decay_t<decltype(p4)> _p4_;                                                           \
-        std::decay_t<decltype(p5)> _p5_;                                                           \
-        std::decay_t<decltype(p6)> _p6_;                                                           \
-        std::decay_t<decltype(p7)> _p7_;                                                           \
-        std::decay_t<decltype(p8)> _p8_;                                                           \
-        std::decay_t<decltype(p9)> _p9_;                                                           \
-        std::decay_t<decltype(p10)> _p10_;                                                         \
-        std::decay_t<decltype(p11)> _p11_;                                                         \
-        std::decay_t<decltype(p12)> _p12_;                                                         \
-        std::decay_t<decltype(p13)> _p13_;                                                         \
-        std::decay_t<decltype(p14)> _p14_;                                                         \
-        std::decay_t<decltype(p15)> _p15_;                                                         \
-        std::decay_t<decltype(p16)> _p16_;                                                         \
-        std::decay_t<decltype(p17)> _p17_;                                                         \
-        std::decay_t<decltype(p18)> _p18_;                                                         \
-        std::decay_t<decltype(p19)> _p19_;                                                         \
-        std::decay_t<decltype(p20)> _p20_;                                                         \
-        std::decay_t<decltype(p21)> _p21_;                                                         \
-        std::decay_t<decltype(p22)> _p22_;                                                         \
-        std::decay_t<decltype(p23)> _p23_;                                                         \
-        std::decay_t<decltype(p24)> _p24_;                                                         \
-        std::decay_t<decltype(p25)> _p25_;                                                         \
-        std::decay_t<decltype(p26)> _p26_;                                                         \
-        std::decay_t<decltype(p27)> _p27_;                                                         \
-        void operator()(const hc::tiled_index<3>&) const [[hc]] {                                  \
-            kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_,  \
-                        _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_,      \
-                        _p22_, _p23_, _p24_, _p25_, _p26_, _p27_);                                 \
-        }                                                                                          \
-    }
-#define make_kernel_functor_hip_29(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
-                                   p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, \
-                                   p22, p23, p24, p25, p26)                                        \
-    struct make_kernel_name_hip(function_name, 27) {                                               \
-        std::decay_t<decltype(p0)> _p0_;                                                           \
-        std::decay_t<decltype(p1)> _p1_;                                                           \
-        std::decay_t<decltype(p2)> _p2_;                                                           \
-        std::decay_t<decltype(p3)> _p3_;                                                           \
-        std::decay_t<decltype(p4)> _p4_;                                                           \
-        std::decay_t<decltype(p5)> _p5_;                                                           \
-        std::decay_t<decltype(p6)> _p6_;                                                           \
-        std::decay_t<decltype(p7)> _p7_;                                                           \
-        std::decay_t<decltype(p8)> _p8_;                                                           \
-        std::decay_t<decltype(p9)> _p9_;                                                           \
-        std::decay_t<decltype(p10)> _p10_;                                                         \
-        std::decay_t<decltype(p11)> _p11_;                                                         \
-        std::decay_t<decltype(p12)> _p12_;                                                         \
-        std::decay_t<decltype(p13)> _p13_;                                                         \
-        std::decay_t<decltype(p14)> _p14_;                                                         \
-        std::decay_t<decltype(p15)> _p15_;                                                         \
-        std::decay_t<decltype(p16)> _p16_;                                                         \
-        std::decay_t<decltype(p17)> _p17_;                                                         \
-        std::decay_t<decltype(p18)> _p18_;                                                         \
-        std::decay_t<decltype(p19)> _p19_;                                                         \
-        std::decay_t<decltype(p20)> _p20_;                                                         \
-        std::decay_t<decltype(p21)> _p21_;                                                         \
-        std::decay_t<decltype(p22)> _p22_;                                                         \
-        std::decay_t<decltype(p23)> _p23_;                                                         \
-        std::decay_t<decltype(p24)> _p24_;                                                         \
-        std::decay_t<decltype(p25)> _p25_;                                                         \
-        std::decay_t<decltype(p26)> _p26_;                                                         \
-        void operator()(const hc::tiled_index<3>&) const [[hc]] {                                  \
-            kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_,  \
-                        _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_,      \
-                        _p22_, _p23_, _p24_, _p25_, _p26_);                                        \
-        }                                                                                          \
-    }
-#define make_kernel_functor_hip_28(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
-                                   p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, \
-                                   p22, p23, p24, p25)                                             \
-    struct make_kernel_name_hip(function_name, 26) {                                               \
-        std::decay_t<decltype(p0)> _p0_;                                                           \
-        std::decay_t<decltype(p1)> _p1_;                                                           \
-        std::decay_t<decltype(p2)> _p2_;                                                           \
-        std::decay_t<decltype(p3)> _p3_;                                                           \
-        std::decay_t<decltype(p4)> _p4_;                                                           \
-        std::decay_t<decltype(p5)> _p5_;                                                           \
-        std::decay_t<decltype(p6)> _p6_;                                                           \
-        std::decay_t<decltype(p7)> _p7_;                                                           \
-        std::decay_t<decltype(p8)> _p8_;                                                           \
-        std::decay_t<decltype(p9)> _p9_;                                                           \
-        std::decay_t<decltype(p10)> _p10_;                                                         \
-        std::decay_t<decltype(p11)> _p11_;                                                         \
-        std::decay_t<decltype(p12)> _p12_;                                                         \
-        std::decay_t<decltype(p13)> _p13_;                                                         \
-        std::decay_t<decltype(p14)> _p14_;                                                         \
-        std::decay_t<decltype(p15)> _p15_;                                                         \
-        std::decay_t<decltype(p16)> _p16_;                                                         \
-        std::decay_t<decltype(p17)> _p17_;                                                         \
-        std::decay_t<decltype(p18)> _p18_;                                                         \
-        std::decay_t<decltype(p19)> _p19_;                                                         \
-        std::decay_t<decltype(p20)> _p20_;                                                         \
-        std::decay_t<decltype(p21)> _p21_;                                                         \
-        std::decay_t<decltype(p22)> _p22_;                                                         \
-        std::decay_t<decltype(p23)> _p23_;                                                         \
-        std::decay_t<decltype(p24)> _p24_;                                                         \
-        std::decay_t<decltype(p25)> _p25_;                                                         \
-        void operator()(const hc::tiled_index<3>&) const [[hc]] {                                  \
-            kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_,  \
-                        _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_,      \
-                        _p22_, _p23_, _p24_, _p25_);                                               \
-        }                                                                                          \
-    }
-#define make_kernel_functor_hip_27(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
-                                   p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, \
-                                   p22, p23, p24)                                                  \
-    struct make_kernel_name_hip(function_name, 25) {                                               \
-        std::decay_t<decltype(p0)> _p0_;                                                           \
-        std::decay_t<decltype(p1)> _p1_;                                                           \
-        std::decay_t<decltype(p2)> _p2_;                                                           \
-        std::decay_t<decltype(p3)> _p3_;                                                           \
-        std::decay_t<decltype(p4)> _p4_;                                                           \
-        std::decay_t<decltype(p5)> _p5_;                                                           \
-        std::decay_t<decltype(p6)> _p6_;                                                           \
-        std::decay_t<decltype(p7)> _p7_;                                                           \
-        std::decay_t<decltype(p8)> _p8_;                                                           \
-        std::decay_t<decltype(p9)> _p9_;                                                           \
-        std::decay_t<decltype(p10)> _p10_;                                                         \
-        std::decay_t<decltype(p11)> _p11_;                                                         \
-        std::decay_t<decltype(p12)> _p12_;                                                         \
-        std::decay_t<decltype(p13)> _p13_;                                                         \
-        std::decay_t<decltype(p14)> _p14_;                                                         \
-        std::decay_t<decltype(p15)> _p15_;                                                         \
-        std::decay_t<decltype(p16)> _p16_;                                                         \
-        std::decay_t<decltype(p17)> _p17_;                                                         \
-        std::decay_t<decltype(p18)> _p18_;                                                         \
-        std::decay_t<decltype(p19)> _p19_;                                                         \
-        std::decay_t<decltype(p20)> _p20_;                                                         \
-        std::decay_t<decltype(p21)> _p21_;                                                         \
-        std::decay_t<decltype(p22)> _p22_;                                                         \
-        std::decay_t<decltype(p23)> _p23_;                                                         \
-        std::decay_t<decltype(p24)> _p24_;                                                         \
-        void operator()(const hc::tiled_index<3>&) const [[hc]] {                                  \
-            kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_,  \
-                        _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_,      \
-                        _p22_, _p23_, _p24_);                                                      \
-        }                                                                                          \
-    }
-#define make_kernel_functor_hip_26(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
-                                   p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, \
-                                   p22, p23)                                                       \
-    struct make_kernel_name_hip(function_name, 24) {                                               \
-        std::decay_t<decltype(p0)> _p0_;                                                           \
-        std::decay_t<decltype(p1)> _p1_;                                                           \
-        std::decay_t<decltype(p2)> _p2_;                                                           \
-        std::decay_t<decltype(p3)> _p3_;                                                           \
-        std::decay_t<decltype(p4)> _p4_;                                                           \
-        std::decay_t<decltype(p5)> _p5_;                                                           \
-        std::decay_t<decltype(p6)> _p6_;                                                           \
-        std::decay_t<decltype(p7)> _p7_;                                                           \
-        std::decay_t<decltype(p8)> _p8_;                                                           \
-        std::decay_t<decltype(p9)> _p9_;                                                           \
-        std::decay_t<decltype(p10)> _p10_;                                                         \
-        std::decay_t<decltype(p11)> _p11_;                                                         \
-        std::decay_t<decltype(p12)> _p12_;                                                         \
-        std::decay_t<decltype(p13)> _p13_;                                                         \
-        std::decay_t<decltype(p14)> _p14_;                                                         \
-        std::decay_t<decltype(p15)> _p15_;                                                         \
-        std::decay_t<decltype(p16)> _p16_;                                                         \
-        std::decay_t<decltype(p17)> _p17_;                                                         \
-        std::decay_t<decltype(p18)> _p18_;                                                         \
-        std::decay_t<decltype(p19)> _p19_;                                                         \
-        std::decay_t<decltype(p20)> _p20_;                                                         \
-        std::decay_t<decltype(p21)> _p21_;                                                         \
-        std::decay_t<decltype(p22)> _p22_;                                                         \
-        std::decay_t<decltype(p23)> _p23_;                                                         \
-        void operator()(const hc::tiled_index<3>&) const [[hc]] {                                  \
-            kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_,  \
-                        _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_,      \
-                        _p22_, _p23_);                                                             \
-        }                                                                                          \
-    }
-#define make_kernel_functor_hip_25(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
-                                   p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, \
-                                   p22)                                                            \
-    struct make_kernel_name_hip(function_name, 23) {                                               \
-        std::decay_t<decltype(p0)> _p0_;                                                           \
-        std::decay_t<decltype(p1)> _p1_;                                                           \
-        std::decay_t<decltype(p2)> _p2_;                                                           \
-        std::decay_t<decltype(p3)> _p3_;                                                           \
-        std::decay_t<decltype(p4)> _p4_;                                                           \
-        std::decay_t<decltype(p5)> _p5_;                                                           \
-        std::decay_t<decltype(p6)> _p6_;                                                           \
-        std::decay_t<decltype(p7)> _p7_;                                                           \
-        std::decay_t<decltype(p8)> _p8_;                                                           \
-        std::decay_t<decltype(p9)> _p9_;                                                           \
-        std::decay_t<decltype(p10)> _p10_;                                                         \
-        std::decay_t<decltype(p11)> _p11_;                                                         \
-        std::decay_t<decltype(p12)> _p12_;                                                         \
-        std::decay_t<decltype(p13)> _p13_;                                                         \
-        std::decay_t<decltype(p14)> _p14_;                                                         \
-        std::decay_t<decltype(p15)> _p15_;                                                         \
-        std::decay_t<decltype(p16)> _p16_;                                                         \
-        std::decay_t<decltype(p17)> _p17_;                                                         \
-        std::decay_t<decltype(p18)> _p18_;                                                         \
-        std::decay_t<decltype(p19)> _p19_;                                                         \
-        std::decay_t<decltype(p20)> _p20_;                                                         \
-        std::decay_t<decltype(p21)> _p21_;                                                         \
-        std::decay_t<decltype(p22)> _p22_;                                                         \
-        __attribute__((used, flatten)) void operator()(const hc::tiled_index<3>&) const [[hc]] {   \
-            kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_,  \
-                        _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_,      \
-                        _p22_);                                                                    \
-        }                                                                                          \
-    }
-#define make_kernel_functor_hip_24(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
-                                   p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21) \
-    struct make_kernel_name_hip(function_name, 22) {                                               \
-        std::decay_t<decltype(p0)> _p0_;                                                           \
-        std::decay_t<decltype(p1)> _p1_;                                                           \
-        std::decay_t<decltype(p2)> _p2_;                                                           \
-        std::decay_t<decltype(p3)> _p3_;                                                           \
-        std::decay_t<decltype(p4)> _p4_;                                                           \
-        std::decay_t<decltype(p5)> _p5_;                                                           \
-        std::decay_t<decltype(p6)> _p6_;                                                           \
-        std::decay_t<decltype(p7)> _p7_;                                                           \
-        std::decay_t<decltype(p8)> _p8_;                                                           \
-        std::decay_t<decltype(p9)> _p9_;                                                           \
-        std::decay_t<decltype(p10)> _p10_;                                                         \
-        std::decay_t<decltype(p11)> _p11_;                                                         \
-        std::decay_t<decltype(p12)> _p12_;                                                         \
-        std::decay_t<decltype(p13)> _p13_;                                                         \
-        std::decay_t<decltype(p14)> _p14_;                                                         \
-        std::decay_t<decltype(p15)> _p15_;                                                         \
-        std::decay_t<decltype(p16)> _p16_;                                                         \
-        std::decay_t<decltype(p17)> _p17_;                                                         \
-        std::decay_t<decltype(p18)> _p18_;                                                         \
-        std::decay_t<decltype(p19)> _p19_;                                                         \
-        std::decay_t<decltype(p20)> _p20_;                                                         \
-        std::decay_t<decltype(p21)> _p21_;                                                         \
-        void operator()(const hc::tiled_index<3>&) const [[hc]] {                                  \
-            kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_,  \
-                        _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_);     \
-        }                                                                                          \
-    }
-#define make_kernel_functor_hip_23(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
-                                   p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20)      \
-    struct make_kernel_name_hip(function_name, 21) {                                               \
-        std::decay_t<decltype(p0)> _p0_;                                                           \
-        std::decay_t<decltype(p1)> _p1_;                                                           \
-        std::decay_t<decltype(p2)> _p2_;                                                           \
-        std::decay_t<decltype(p3)> _p3_;                                                           \
-        std::decay_t<decltype(p4)> _p4_;                                                           \
-        std::decay_t<decltype(p5)> _p5_;                                                           \
-        std::decay_t<decltype(p6)> _p6_;                                                           \
-        std::decay_t<decltype(p7)> _p7_;                                                           \
-        std::decay_t<decltype(p8)> _p8_;                                                           \
-        std::decay_t<decltype(p9)> _p9_;                                                           \
-        std::decay_t<decltype(p10)> _p10_;                                                         \
-        std::decay_t<decltype(p11)> _p11_;                                                         \
-        std::decay_t<decltype(p12)> _p12_;                                                         \
-        std::decay_t<decltype(p13)> _p13_;                                                         \
-        std::decay_t<decltype(p14)> _p14_;                                                         \
-        std::decay_t<decltype(p15)> _p15_;                                                         \
-        std::decay_t<decltype(p16)> _p16_;                                                         \
-        std::decay_t<decltype(p17)> _p17_;                                                         \
-        std::decay_t<decltype(p18)> _p18_;                                                         \
-        std::decay_t<decltype(p19)> _p19_;                                                         \
-        std::decay_t<decltype(p20)> _p20_;                                                         \
-        void operator()(const hc::tiled_index<3>&) const [[hc]] {                                  \
-            kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_,  \
-                        _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_);            \
-        }                                                                                          \
-    }
-#define make_kernel_functor_hip_22(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
-                                   p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19)           \
-    struct make_kernel_name_hip(function_name, 20) {                                               \
-        std::decay_t<decltype(p0)> _p0_;                                                           \
-        std::decay_t<decltype(p1)> _p1_;                                                           \
-        std::decay_t<decltype(p2)> _p2_;                                                           \
-        std::decay_t<decltype(p3)> _p3_;                                                           \
-        std::decay_t<decltype(p4)> _p4_;                                                           \
-        std::decay_t<decltype(p5)> _p5_;                                                           \
-        std::decay_t<decltype(p6)> _p6_;                                                           \
-        std::decay_t<decltype(p7)> _p7_;                                                           \
-        std::decay_t<decltype(p8)> _p8_;                                                           \
-        std::decay_t<decltype(p9)> _p9_;                                                           \
-        std::decay_t<decltype(p10)> _p10_;                                                         \
-        std::decay_t<decltype(p11)> _p11_;                                                         \
-        std::decay_t<decltype(p12)> _p12_;                                                         \
-        std::decay_t<decltype(p13)> _p13_;                                                         \
-        std::decay_t<decltype(p14)> _p14_;                                                         \
-        std::decay_t<decltype(p15)> _p15_;                                                         \
-        std::decay_t<decltype(p16)> _p16_;                                                         \
-        std::decay_t<decltype(p17)> _p17_;                                                         \
-        std::decay_t<decltype(p18)> _p18_;                                                         \
-        std::decay_t<decltype(p19)> _p19_;                                                         \
-        void operator()(const hc::tiled_index<3>&) const [[hc]] {                                  \
-            kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_,  \
-                        _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_);                   \
-        }                                                                                          \
-    }
-#define make_kernel_functor_hip_21(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
-                                   p9, p10, p11, p12, p13, p14, p15, p16, p17, p18)                \
-    struct make_kernel_name_hip(function_name, 19) {                                               \
-        std::decay_t<decltype(p0)> _p0_;                                                           \
-        std::decay_t<decltype(p1)> _p1_;                                                           \
-        std::decay_t<decltype(p2)> _p2_;                                                           \
-        std::decay_t<decltype(p3)> _p3_;                                                           \
-        std::decay_t<decltype(p4)> _p4_;                                                           \
-        std::decay_t<decltype(p5)> _p5_;                                                           \
-        std::decay_t<decltype(p6)> _p6_;                                                           \
-        std::decay_t<decltype(p7)> _p7_;                                                           \
-        std::decay_t<decltype(p8)> _p8_;                                                           \
-        std::decay_t<decltype(p9)> _p9_;                                                           \
-        std::decay_t<decltype(p10)> _p10_;                                                         \
-        std::decay_t<decltype(p11)> _p11_;                                                         \
-        std::decay_t<decltype(p12)> _p12_;                                                         \
-        std::decay_t<decltype(p13)> _p13_;                                                         \
-        std::decay_t<decltype(p14)> _p14_;                                                         \
-        std::decay_t<decltype(p15)> _p15_;                                                         \
-        std::decay_t<decltype(p16)> _p16_;                                                         \
-        std::decay_t<decltype(p17)> _p17_;                                                         \
-        std::decay_t<decltype(p18)> _p18_;                                                         \
-        void operator()(const hc::tiled_index<3>&) const [[hc]] {                                  \
-            kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_,  \
-                        _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_);                          \
-        }                                                                                          \
-    }
-#define make_kernel_functor_hip_20(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
-                                   p9, p10, p11, p12, p13, p14, p15, p16, p17)                     \
-    struct make_kernel_name_hip(function_name, 18) {                                               \
-        std::decay_t<decltype(p0)> _p0_;                                                           \
-        std::decay_t<decltype(p1)> _p1_;                                                           \
-        std::decay_t<decltype(p2)> _p2_;                                                           \
-        std::decay_t<decltype(p3)> _p3_;                                                           \
-        std::decay_t<decltype(p4)> _p4_;                                                           \
-        std::decay_t<decltype(p5)> _p5_;                                                           \
-        std::decay_t<decltype(p6)> _p6_;                                                           \
-        std::decay_t<decltype(p7)> _p7_;                                                           \
-        std::decay_t<decltype(p8)> _p8_;                                                           \
-        std::decay_t<decltype(p9)> _p9_;                                                           \
-        std::decay_t<decltype(p10)> _p10_;                                                         \
-        std::decay_t<decltype(p11)> _p11_;                                                         \
-        std::decay_t<decltype(p12)> _p12_;                                                         \
-        std::decay_t<decltype(p13)> _p13_;                                                         \
-        std::decay_t<decltype(p14)> _p14_;                                                         \
-        std::decay_t<decltype(p15)> _p15_;                                                         \
-        std::decay_t<decltype(p16)> _p16_;                                                         \
-        std::decay_t<decltype(p17)> _p17_;                                                         \
-        void operator()(const hc::tiled_index<3>&) const [[hc]] {                                  \
-            kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_,  \
-                        _p12_, _p13_, _p14_, _p15_, _p16_, _p17_);                                 \
-        }                                                                                          \
-    }
-#define make_kernel_functor_hip_19(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
-                                   p9, p10, p11, p12, p13, p14, p15, p16)                          \
-    struct make_kernel_name_hip(function_name, 17) {                                               \
-        std::decay_t<decltype(p0)> _p0_;                                                           \
-        std::decay_t<decltype(p1)> _p1_;                                                           \
-        std::decay_t<decltype(p2)> _p2_;                                                           \
-        std::decay_t<decltype(p3)> _p3_;                                                           \
-        std::decay_t<decltype(p4)> _p4_;                                                           \
-        std::decay_t<decltype(p5)> _p5_;                                                           \
-        std::decay_t<decltype(p6)> _p6_;                                                           \
-        std::decay_t<decltype(p7)> _p7_;                                                           \
-        std::decay_t<decltype(p8)> _p8_;                                                           \
-        std::decay_t<decltype(p9)> _p9_;                                                           \
-        std::decay_t<decltype(p10)> _p10_;                                                         \
-        std::decay_t<decltype(p11)> _p11_;                                                         \
-        std::decay_t<decltype(p12)> _p12_;                                                         \
-        std::decay_t<decltype(p13)> _p13_;                                                         \
-        std::decay_t<decltype(p14)> _p14_;                                                         \
-        std::decay_t<decltype(p15)> _p15_;                                                         \
-        std::decay_t<decltype(p16)> _p16_;                                                         \
-        void operator()(const hc::tiled_index<3>&) const [[hc]] {                                  \
-            kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_,  \
-                        _p12_, _p13_, _p14_, _p15_, _p16_);                                        \
-        }                                                                                          \
-    }
-#define make_kernel_functor_hip_18(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
-                                   p9, p10, p11, p12, p13, p14, p15)                               \
-    struct make_kernel_name_hip(function_name, 16) {                                               \
-        std::decay_t<decltype(p0)> _p0_;                                                           \
-        std::decay_t<decltype(p1)> _p1_;                                                           \
-        std::decay_t<decltype(p2)> _p2_;                                                           \
-        std::decay_t<decltype(p3)> _p3_;                                                           \
-        std::decay_t<decltype(p4)> _p4_;                                                           \
-        std::decay_t<decltype(p5)> _p5_;                                                           \
-        std::decay_t<decltype(p6)> _p6_;                                                           \
-        std::decay_t<decltype(p7)> _p7_;                                                           \
-        std::decay_t<decltype(p8)> _p8_;                                                           \
-        std::decay_t<decltype(p9)> _p9_;                                                           \
-        std::decay_t<decltype(p10)> _p10_;                                                         \
-        std::decay_t<decltype(p11)> _p11_;                                                         \
-        std::decay_t<decltype(p12)> _p12_;                                                         \
-        std::decay_t<decltype(p13)> _p13_;                                                         \
-        std::decay_t<decltype(p14)> _p14_;                                                         \
-        std::decay_t<decltype(p15)> _p15_;                                                         \
-        void operator()(const hc::tiled_index<3>&) const [[hc]] {                                  \
-            kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_,  \
-                        _p12_, _p13_, _p14_, _p15_);                                               \
-        }                                                                                          \
-    }
-#define make_kernel_functor_hip_17(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
-                                   p9, p10, p11, p12, p13, p14)                                    \
-    struct make_kernel_name_hip(function_name, 15) {                                               \
-        std::decay_t<decltype(p0)> _p0_;                                                           \
-        std::decay_t<decltype(p1)> _p1_;                                                           \
-        std::decay_t<decltype(p2)> _p2_;                                                           \
-        std::decay_t<decltype(p3)> _p3_;                                                           \
-        std::decay_t<decltype(p4)> _p4_;                                                           \
-        std::decay_t<decltype(p5)> _p5_;                                                           \
-        std::decay_t<decltype(p6)> _p6_;                                                           \
-        std::decay_t<decltype(p7)> _p7_;                                                           \
-        std::decay_t<decltype(p8)> _p8_;                                                           \
-        std::decay_t<decltype(p9)> _p9_;                                                           \
-        std::decay_t<decltype(p10)> _p10_;                                                         \
-        std::decay_t<decltype(p11)> _p11_;                                                         \
-        std::decay_t<decltype(p12)> _p12_;                                                         \
-        std::decay_t<decltype(p13)> _p13_;                                                         \
-        std::decay_t<decltype(p14)> _p14_;                                                         \
-        void operator()(const hc::tiled_index<3>&) const [[hc]] {                                  \
-            kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_,  \
-                        _p12_, _p13_, _p14_);                                                      \
-        }                                                                                          \
-    }
-#define make_kernel_functor_hip_16(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
-                                   p9, p10, p11, p12, p13)                                         \
-    struct make_kernel_name_hip(function_name, 14) {                                               \
-        std::decay_t<decltype(p0)> _p0_;                                                           \
-        std::decay_t<decltype(p1)> _p1_;                                                           \
-        std::decay_t<decltype(p2)> _p2_;                                                           \
-        std::decay_t<decltype(p3)> _p3_;                                                           \
-        std::decay_t<decltype(p4)> _p4_;                                                           \
-        std::decay_t<decltype(p5)> _p5_;                                                           \
-        std::decay_t<decltype(p6)> _p6_;                                                           \
-        std::decay_t<decltype(p7)> _p7_;                                                           \
-        std::decay_t<decltype(p8)> _p8_;                                                           \
-        std::decay_t<decltype(p9)> _p9_;                                                           \
-        std::decay_t<decltype(p10)> _p10_;                                                         \
-        std::decay_t<decltype(p11)> _p11_;                                                         \
-        std::decay_t<decltype(p12)> _p12_;                                                         \
-        std::decay_t<decltype(p13)> _p13_;                                                         \
-        void operator()(const hc::tiled_index<3>&) const [[hc]] {                                  \
-            kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_,  \
-                        _p12_, _p13_);                                                             \
-        }                                                                                          \
-    }
-#define make_kernel_functor_hip_15(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
-                                   p9, p10, p11, p12)                                              \
-    struct make_kernel_name_hip(function_name, 13) {                                               \
-        std::decay_t<decltype(p0)> _p0_;                                                           \
-        std::decay_t<decltype(p1)> _p1_;                                                           \
-        std::decay_t<decltype(p2)> _p2_;                                                           \
-        std::decay_t<decltype(p3)> _p3_;                                                           \
-        std::decay_t<decltype(p4)> _p4_;                                                           \
-        std::decay_t<decltype(p5)> _p5_;                                                           \
-        std::decay_t<decltype(p6)> _p6_;                                                           \
-        std::decay_t<decltype(p7)> _p7_;                                                           \
-        std::decay_t<decltype(p8)> _p8_;                                                           \
-        std::decay_t<decltype(p9)> _p9_;                                                           \
-        std::decay_t<decltype(p10)> _p10_;                                                         \
-        std::decay_t<decltype(p11)> _p11_;                                                         \
-        std::decay_t<decltype(p12)> _p12_;                                                         \
-        void operator()(const hc::tiled_index<3>&) const [[hc]] {                                  \
-            kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_,  \
-                        _p12_);                                                                    \
-        }                                                                                          \
-    }
-#define make_kernel_functor_hip_14(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
-                                   p9, p10, p11)                                                   \
-    struct make_kernel_name_hip(function_name, 12) {                                               \
-        std::decay_t<decltype(p0)> _p0_;                                                           \
-        std::decay_t<decltype(p1)> _p1_;                                                           \
-        std::decay_t<decltype(p2)> _p2_;                                                           \
-        std::decay_t<decltype(p3)> _p3_;                                                           \
-        std::decay_t<decltype(p4)> _p4_;                                                           \
-        std::decay_t<decltype(p5)> _p5_;                                                           \
-        std::decay_t<decltype(p6)> _p6_;                                                           \
-        std::decay_t<decltype(p7)> _p7_;                                                           \
-        std::decay_t<decltype(p8)> _p8_;                                                           \
-        std::decay_t<decltype(p9)> _p9_;                                                           \
-        std::decay_t<decltype(p10)> _p10_;                                                         \
-        std::decay_t<decltype(p11)> _p11_;                                                         \
-        void operator()(const hc::tiled_index<3>&) const [[hc]] {                                  \
-            kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_); \
-        }                                                                                          \
-    }
-#define make_kernel_functor_hip_13(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
-                                   p9, p10)                                                        \
-    struct make_kernel_name_hip(function_name, 11) {                                               \
-        std::decay_t<decltype(p0)> _p0_;                                                           \
-        std::decay_t<decltype(p1)> _p1_;                                                           \
-        std::decay_t<decltype(p2)> _p2_;                                                           \
-        std::decay_t<decltype(p3)> _p3_;                                                           \
-        std::decay_t<decltype(p4)> _p4_;                                                           \
-        std::decay_t<decltype(p5)> _p5_;                                                           \
-        std::decay_t<decltype(p6)> _p6_;                                                           \
-        std::decay_t<decltype(p7)> _p7_;                                                           \
-        std::decay_t<decltype(p8)> _p8_;                                                           \
-        std::decay_t<decltype(p9)> _p9_;                                                           \
-        std::decay_t<decltype(p10)> _p10_;                                                         \
-        void operator()(const hc::tiled_index<3>&) const [[hc]] {                                  \
-            kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_);        \
-        }                                                                                          \
-    }
-#define make_kernel_functor_hip_12(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \
-                                   p9)                                                             \
-    struct make_kernel_name_hip(function_name, 10) {                                               \
-        std::decay_t<decltype(p0)> _p0_;                                                           \
-        std::decay_t<decltype(p1)> _p1_;                                                           \
-        std::decay_t<decltype(p2)> _p2_;                                                           \
-        std::decay_t<decltype(p3)> _p3_;                                                           \
-        std::decay_t<decltype(p4)> _p4_;                                                           \
-        std::decay_t<decltype(p5)> _p5_;                                                           \
-        std::decay_t<decltype(p6)> _p6_;                                                           \
-        std::decay_t<decltype(p7)> _p7_;                                                           \
-        std::decay_t<decltype(p8)> _p8_;                                                           \
-        std::decay_t<decltype(p9)> _p9_;                                                           \
-        void operator()(const hc::tiled_index<3>&) const                                           \
-            [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_); }    \
-    }
-#define make_kernel_functor_hip_11(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8) \
-    struct make_kernel_name_hip(function_name, 9) {                                                \
-        std::decay_t<decltype(p0)> _p0_;                                                           \
-        std::decay_t<decltype(p1)> _p1_;                                                           \
-        std::decay_t<decltype(p2)> _p2_;                                                           \
-        std::decay_t<decltype(p3)> _p3_;                                                           \
-        std::decay_t<decltype(p4)> _p4_;                                                           \
-        std::decay_t<decltype(p5)> _p5_;                                                           \
-        std::decay_t<decltype(p6)> _p6_;                                                           \
-        std::decay_t<decltype(p7)> _p7_;                                                           \
-        std::decay_t<decltype(p8)> _p8_;                                                           \
-        void operator()(const hc::tiled_index<3>&) const                                           \
-            [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_); }          \
-    }
-#define make_kernel_functor_hip_10(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7)     \
-    struct make_kernel_name_hip(function_name, 8) {                                                \
-        std::decay_t<decltype(p0)> _p0_;                                                           \
-        std::decay_t<decltype(p1)> _p1_;                                                           \
-        std::decay_t<decltype(p2)> _p2_;                                                           \
-        std::decay_t<decltype(p3)> _p3_;                                                           \
-        std::decay_t<decltype(p4)> _p4_;                                                           \
-        std::decay_t<decltype(p5)> _p5_;                                                           \
-        std::decay_t<decltype(p6)> _p6_;                                                           \
-        std::decay_t<decltype(p7)> _p7_;                                                           \
-        void operator()(const hc::tiled_index<3>&) const                                           \
-            [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_); }                \
-    }
-#define make_kernel_functor_hip_9(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6)          \
-    struct make_kernel_name_hip(function_name, 7) {                                                \
-        std::decay_t<decltype(p0)> _p0_;                                                           \
-        std::decay_t<decltype(p1)> _p1_;                                                           \
-        std::decay_t<decltype(p2)> _p2_;                                                           \
-        std::decay_t<decltype(p3)> _p3_;                                                           \
-        std::decay_t<decltype(p4)> _p4_;                                                           \
-        std::decay_t<decltype(p5)> _p5_;                                                           \
-        std::decay_t<decltype(p6)> _p6_;                                                           \
-        void operator()(const hc::tiled_index<3>&) const                                           \
-            [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_); }                      \
-    }
-#define make_kernel_functor_hip_8(function_name, kernel_name, p0, p1, p2, p3, p4, p5)              \
-    struct make_kernel_name_hip(function_name, 6) {                                                \
-        std::decay_t<decltype(p0)> _p0_;                                                           \
-        std::decay_t<decltype(p1)> _p1_;                                                           \
-        std::decay_t<decltype(p2)> _p2_;                                                           \
-        std::decay_t<decltype(p3)> _p3_;                                                           \
-        std::decay_t<decltype(p4)> _p4_;                                                           \
-        std::decay_t<decltype(p5)> _p5_;                                                           \
-        void operator()(const hc::tiled_index<3>&) const                                           \
-            [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_); }                            \
-    }
-#define make_kernel_functor_hip_7(function_name, kernel_name, p0, p1, p2, p3, p4)                  \
-    struct make_kernel_name_hip(function_name, 5) {                                                \
-        std::decay_t<decltype(p0)> _p0_;                                                           \
-        std::decay_t<decltype(p1)> _p1_;                                                           \
-        std::decay_t<decltype(p2)> _p2_;                                                           \
-        std::decay_t<decltype(p3)> _p3_;                                                           \
-        std::decay_t<decltype(p4)> _p4_;                                                           \
-        void operator()(const hc::tiled_index<3>&) const                                           \
-            [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_); }                                  \
-    }
-#define make_kernel_functor_hip_6(function_name, kernel_name, p0, p1, p2, p3)                      \
-    struct make_kernel_name_hip(function_name, 4) {                                                \
-        std::decay_t<decltype(p0)> _p0_;                                                           \
-        std::decay_t<decltype(p1)> _p1_;                                                           \
-        std::decay_t<decltype(p2)> _p2_;                                                           \
-        std::decay_t<decltype(p3)> _p3_;                                                           \
-        void operator()(const hc::tiled_index<3>&) const                                           \
-            [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_); }                                        \
-    }
-#define make_kernel_functor_hip_5(function_name, kernel_name, p0, p1, p2)                          \
-    struct make_kernel_name_hip(function_name, 3) {                                                \
-        std::decay_t<decltype(p0)> _p0_;                                                           \
-        std::decay_t<decltype(p1)> _p1_;                                                           \
-        std::decay_t<decltype(p2)> _p2_;                                                           \
-        void operator()(const hc::tiled_index<3>&) const [[hc]] { kernel_name(_p0_, _p1_, _p2_); } \
-    }
-#define make_kernel_functor_hip_4(function_name, kernel_name, p0, p1)                              \
-    struct make_kernel_name_hip(function_name, 2) {                                                \
-        std::decay_t<decltype(p0)> _p0_;                                                           \
-        std::decay_t<decltype(p1)> _p1_;                                                           \
-        void operator()(const hc::tiled_index<3>&) const [[hc]] { kernel_name(_p0_, _p1_); }       \
-    }
-#define fofo(f, n) kernel_prefix_hip##f##kernel_suffix_hip##n
-#define make_kernel_functor_hip_3(function_name, kernel_name, p0)                                  \
-    struct make_kernel_name_hip(function_name, 1) {                                                \
-        std::decay_t<decltype(p0)> _p0_;                                                           \
-        void operator()(const hc::tiled_index<3>&) const [[hc]] { kernel_name(_p0_); }             \
-    }
-#define make_kernel_functor_hip_2(function_name, kernel_name)                                      \
-    struct make_kernel_name_hip(function_name, 0) {                                                \
-        void operator()(const hc::tiled_index<3>&)[[hc]] { return kernel_name(hipLaunchParm{}); }  \
-    }
-#define make_kernel_functor_hip_1(...)
-#define make_kernel_functor_hip_0(...)
-#define make_kernel_functor_hip_(...) overload_macro_hip_(make_kernel_functor_hip_, __VA_ARGS__)
-
-
-#define hipLaunchNamedKernelGGL(function_name, kernel_name, num_blocks, dim_blocks,                \
-                                group_mem_bytes, stream, ...)                                      \
-    do {                                                                                           \
-        make_kernel_functor_hip_(function_name, kernel_name, __VA_ARGS__)                          \
-            hip_kernel_functor_impl_{__VA_ARGS__};                                                 \
-        hip_impl::grid_launch_hip_(num_blocks, dim_blocks, group_mem_bytes, stream, #kernel_name,  \
-                                   hip_kernel_functor_impl_);                                      \
-    } while (0)
-
-#define hipLaunchKernelGGL(kernel_name, num_blocks, dim_blocks, group_mem_bytes, stream, ...)      \
-    do {                                                                                           \
-        hipLaunchNamedKernelGGL(unnamed, kernel_name, num_blocks, dim_blocks, group_mem_bytes,     \
-                                stream, ##__VA_ARGS__);                                            \
-    } while (0)
-
-#define hipLaunchKernel(kernel_name, num_blocks, dim_blocks, group_mem_bytes, stream, ...)         \
-    do {                                                                                           \
-        hipLaunchKernelGGL(kernel_name, num_blocks, dim_blocks, group_mem_bytes, stream,           \
-                           hipLaunchParm{}, ##__VA_ARGS__);                                        \
-    } while (0)
-}  // namespace hip_impl
diff --git a/third_party/rocm/include/hip/hcc_detail/math_functions.h b/third_party/rocm/include/hip/hcc_detail/math_functions.h
deleted file mode 100644
index 3dbc9a2..0000000
--- a/third_party/rocm/include/hip/hcc_detail/math_functions.h
+++ /dev/null
@@ -1,1557 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#pragma once
-
-#include "hip_fp16_math_fwd.h"
-#include "hip_vector_types.h"
-#include "math_fwd.h"
-
-#include <hip/hcc_detail/host_defines.h>
-
-#include <algorithm>
-
-// assert.h is only for the host version of assert.
-// The device version of assert is implemented in hip/hcc_detail/hip_runtime.h.
-// Users should include hip_runtime.h for the device version of assert.
-#if !__HIP_DEVICE_COMPILE__
-#include <assert.h>
-#endif
-
-#include <limits.h>
-#include <limits>
-#include <stdint.h>
-
-// HCC's own math functions should be included first, otherwise there will
-// be conflicts when hip/math_functions.h is included before hip/hip_runtime.h.
-#ifdef __HCC__
-#include "kalmar_math.h"
-#endif
-
-#if _LIBCPP_VERSION && __HIP__
-namespace std {
-template <>
-struct __numeric_type<_Float16>
-{
-   static _Float16 __test(_Float16);
-
-   typedef _Float16 type;
-   static const bool value = true;
-};
-}
-#endif // _LIBCPP_VERSION
-
-#pragma push_macro("__DEVICE__")
-#pragma push_macro("__RETURN_TYPE")
-
-#ifdef __HCC__
-#define __DEVICE__ __device__
-#define __RETURN_TYPE int
-#else // to be consistent with __clang_cuda_math_forward_declares
-#define __DEVICE__ static __device__
-#define __RETURN_TYPE bool
-#endif
-
-#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
-__DEVICE__
-inline
-uint64_t __make_mantissa_base8(const char* tagp)
-{
-    uint64_t r = 0;
-    while (tagp) {
-        char tmp = *tagp;
-
-        if (tmp >= '0' && tmp <= '7') r = (r * 8u) + tmp - '0';
-        else return 0;
-
-        ++tagp;
-    }
-
-    return r;
-}
-
-__DEVICE__
-inline
-uint64_t __make_mantissa_base10(const char* tagp)
-{
-    uint64_t r = 0;
-    while (tagp) {
-        char tmp = *tagp;
-
-        if (tmp >= '0' && tmp <= '9') r = (r * 10u) + tmp - '0';
-        else return 0;
-
-        ++tagp;
-    }
-
-    return r;
-}
-
-__DEVICE__
-inline
-uint64_t __make_mantissa_base16(const char* tagp)
-{
-    uint64_t r = 0;
-    while (tagp) {
-        char tmp = *tagp;
-
-        if (tmp >= '0' && tmp <= '9') r = (r * 16u) + tmp - '0';
-        else if (tmp >= 'a' && tmp <= 'f') r = (r * 16u) + tmp - 'a' + 10;
-        else if (tmp >= 'A' && tmp <= 'F') r = (r * 16u) + tmp - 'A' + 10;
-        else return 0;
-
-        ++tagp;
-    }
-
-    return r;
-}
-
-__DEVICE__
-inline
-uint64_t __make_mantissa(const char* tagp)
-{
-    if (!tagp) return 0u;
-
-    if (*tagp == '0') {
-        ++tagp;
-
-        if (*tagp == 'x' || *tagp == 'X') return __make_mantissa_base16(tagp);
-        else return __make_mantissa_base8(tagp);
-    }
-
-    return __make_mantissa_base10(tagp);
-}
-#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
-
-// DOT FUNCTIONS
-#if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__
-__DEVICE__
-inline
-int amd_mixed_dot(short2 a, short2 b, int c, bool saturate) {
-    return __ockl_sdot2(a.data, b.data, c, saturate);
-}
-__DEVICE__
-inline
-uint amd_mixed_dot(ushort2 a, ushort2 b, uint c, bool saturate) {
-    return __ockl_udot2(a.data, b.data, c, saturate);
-}
-__DEVICE__
-inline
-int amd_mixed_dot(char4 a, char4 b, int c, bool saturate) {
-    return __ockl_sdot4(a.data, b.data, c, saturate);
-}
-__DEVICE__
-inline
-uint amd_mixed_dot(uchar4 a, uchar4 b, uint c, bool saturate) {
-    return __ockl_udot4(a.data, b.data, c, saturate);
-}
-__DEVICE__
-inline
-int amd_mixed_dot(int a, int b, int c, bool saturate) {
-    return __ockl_sdot8(a, b, c, saturate);
-}
-__DEVICE__
-inline
-uint amd_mixed_dot(uint a, uint b, uint c, bool saturate) {
-    return __ockl_udot8(a, b, c, saturate);
-}
-#endif
-
-#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
-// BEGIN FLOAT
-__DEVICE__
-inline
-float abs(float x) { return __ocml_fabs_f32(x); }
-__DEVICE__
-inline
-float acosf(float x) { return __ocml_acos_f32(x); }
-__DEVICE__
-inline
-float acoshf(float x) { return __ocml_acosh_f32(x); }
-__DEVICE__
-inline
-float asinf(float x) { return __ocml_asin_f32(x); }
-__DEVICE__
-inline
-float asinhf(float x) { return __ocml_asinh_f32(x); }
-__DEVICE__
-inline
-float atan2f(float x, float y) { return __ocml_atan2_f32(x, y); }
-__DEVICE__
-inline
-float atanf(float x) { return __ocml_atan_f32(x); }
-__DEVICE__
-inline
-float atanhf(float x) { return __ocml_atanh_f32(x); }
-__DEVICE__
-inline
-float cbrtf(float x) { return __ocml_cbrt_f32(x); }
-__DEVICE__
-inline
-float ceilf(float x) { return __ocml_ceil_f32(x); }
-__DEVICE__
-inline
-float copysignf(float x, float y) { return __ocml_copysign_f32(x, y); }
-__DEVICE__
-inline
-float cosf(float x) { return __ocml_cos_f32(x); }
-__DEVICE__
-inline
-float coshf(float x) { return __ocml_cosh_f32(x); }
-__DEVICE__
-inline
-float cospif(float x) { return __ocml_cospi_f32(x); }
-__DEVICE__
-inline
-float cyl_bessel_i0f(float x) { return __ocml_i0_f32(x); }
-__DEVICE__
-inline
-float cyl_bessel_i1f(float x) { return __ocml_i1_f32(x); }
-__DEVICE__
-inline
-float erfcf(float x) { return __ocml_erfc_f32(x); }
-__DEVICE__
-inline
-float erfcinvf(float x) { return __ocml_erfcinv_f32(x); }
-__DEVICE__
-inline
-float erfcxf(float x) { return __ocml_erfcx_f32(x); }
-__DEVICE__
-inline
-float erff(float x) { return __ocml_erf_f32(x); }
-__DEVICE__
-inline
-float erfinvf(float x) { return __ocml_erfinv_f32(x); }
-__DEVICE__
-inline
-float exp10f(float x) { return __ocml_exp10_f32(x); }
-__DEVICE__
-inline
-float exp2f(float x) { return __ocml_exp2_f32(x); }
-__DEVICE__
-inline
-float expf(float x) { return __ocml_exp_f32(x); }
-__DEVICE__
-inline
-float expm1f(float x) { return __ocml_expm1_f32(x); }
-__DEVICE__
-inline
-float fabsf(float x) { return __ocml_fabs_f32(x); }
-__DEVICE__
-inline
-float fdimf(float x, float y) { return __ocml_fdim_f32(x, y); }
-__DEVICE__
-inline
-float fdividef(float x, float y) { return x / y; }
-__DEVICE__
-inline
-float floorf(float x) { return __ocml_floor_f32(x); }
-__DEVICE__
-inline
-float fmaf(float x, float y, float z) { return __ocml_fma_f32(x, y, z); }
-__DEVICE__
-inline
-float fmaxf(float x, float y) { return __ocml_fmax_f32(x, y); }
-__DEVICE__
-inline
-float fminf(float x, float y) { return __ocml_fmin_f32(x, y); }
-__DEVICE__
-inline
-float fmodf(float x, float y) { return __ocml_fmod_f32(x, y); }
-__DEVICE__
-inline
-float frexpf(float x, int* nptr)
-{
-    int tmp;
-    float r =
-        __ocml_frexp_f32(x, (__attribute__((address_space(5))) int*) &tmp);
-    *nptr = tmp;
-
-    return r;
-}
-__DEVICE__
-inline
-float hypotf(float x, float y) { return __ocml_hypot_f32(x, y); }
-__DEVICE__
-inline
-int ilogbf(float x) { return __ocml_ilogb_f32(x); }
-__DEVICE__
-inline
-__RETURN_TYPE isfinite(float x) { return __ocml_isfinite_f32(x); }
-__DEVICE__
-inline
-__RETURN_TYPE isinf(float x) { return __ocml_isinf_f32(x); }
-__DEVICE__
-inline
-__RETURN_TYPE isnan(float x) { return __ocml_isnan_f32(x); }
-__DEVICE__
-inline
-float j0f(float x) { return __ocml_j0_f32(x); }
-__DEVICE__
-inline
-float j1f(float x) { return __ocml_j1_f32(x); }
-__DEVICE__
-inline
-float jnf(int n, float x)
-{   // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
-    //       for linear recurrences to get O(log n) steps, but it's unclear if
-    //       it'd be beneficial in this case.
-    if (n == 0) return j0f(x);
-    if (n == 1) return j1f(x);
-
-    float x0 = j0f(x);
-    float x1 = j1f(x);
-    for (int i = 1; i < n; ++i) {
-        float x2 = (2 * i) / x * x1 - x0;
-        x0 = x1;
-        x1 = x2;
-    }
-
-    return x1;
-}
-__DEVICE__
-inline
-float ldexpf(float x, int e) { return __ocml_ldexp_f32(x, e); }
-__DEVICE__
-inline
-float lgammaf(float x) { return __ocml_lgamma_f32(x); }
-__DEVICE__
-inline
-long long int llrintf(float x) { return __ocml_rint_f32(x); }
-__DEVICE__
-inline
-long long int llroundf(float x) { return __ocml_round_f32(x); }
-__DEVICE__
-inline
-float log10f(float x) { return __ocml_log10_f32(x); }
-__DEVICE__
-inline
-float log1pf(float x) { return __ocml_log1p_f32(x); }
-__DEVICE__
-inline
-float log2f(float x) { return __ocml_log2_f32(x); }
-__DEVICE__
-inline
-float logbf(float x) { return __ocml_logb_f32(x); }
-__DEVICE__
-inline
-float logf(float x) { return __ocml_log_f32(x); }
-__DEVICE__
-inline
-long int lrintf(float x) { return __ocml_rint_f32(x); }
-__DEVICE__
-inline
-long int lroundf(float x) { return __ocml_round_f32(x); }
-__DEVICE__
-inline
-float modff(float x, float* iptr)
-{
-    float tmp;
-    float r =
-        __ocml_modf_f32(x, (__attribute__((address_space(5))) float*) &tmp);
-    *iptr = tmp;
-
-    return r;
-}
-__DEVICE__
-inline
-float nanf(const char* tagp)
-{
-    union {
-        float val;
-        struct ieee_float {
-            uint32_t mantissa : 22;
-            uint32_t quiet : 1;
-            uint32_t exponent : 8;
-            uint32_t sign : 1;
-        } bits;
-
-        static_assert(sizeof(float) == sizeof(ieee_float), "");
-    } tmp;
-
-    tmp.bits.sign = 0u;
-    tmp.bits.exponent = ~0u;
-    tmp.bits.quiet = 1u;
-    tmp.bits.mantissa = __make_mantissa(tagp);
-
-    return tmp.val;
-}
-__DEVICE__
-inline
-float nearbyintf(float x) { return __ocml_nearbyint_f32(x); }
-__DEVICE__
-inline
-float nextafterf(float x, float y) { return __ocml_nextafter_f32(x, y); }
-__DEVICE__
-inline
-float norm3df(float x, float y, float z) { return __ocml_len3_f32(x, y, z); }
-__DEVICE__
-inline
-float norm4df(float x, float y, float z, float w)
-{
-    return __ocml_len4_f32(x, y, z, w);
-}
-__DEVICE__
-inline
-float normcdff(float x) { return __ocml_ncdf_f32(x); }
-__DEVICE__
-inline
-float normcdfinvf(float x) { return __ocml_ncdfinv_f32(x); }
-__DEVICE__
-inline
-float normf(int dim, const float* a)
-{   // TODO: placeholder until OCML adds support.
-    float r = 0;
-    while (dim--) { r += a[0] * a[0]; ++a; }
-
-    return __ocml_sqrt_f32(r);
-}
-__DEVICE__
-inline
-float powf(float x, float y) { return __ocml_pow_f32(x, y); }
-__DEVICE__
-inline
-float powif(float base, int iexp) { return __ocml_pown_f32(base, iexp); }
-__DEVICE__
-inline
-float rcbrtf(float x) { return __ocml_rcbrt_f32(x); }
-__DEVICE__
-inline
-float remainderf(float x, float y) { return __ocml_remainder_f32(x, y); }
-__DEVICE__
-inline
-float remquof(float x, float y, int* quo)
-{
-    int tmp;
-    float r =
-        __ocml_remquo_f32(x, y, (__attribute__((address_space(5))) int*) &tmp);
-    *quo = tmp;
-
-    return r;
-}
-__DEVICE__
-inline
-float rhypotf(float x, float y) { return __ocml_rhypot_f32(x, y); }
-__DEVICE__
-inline
-float rintf(float x) { return __ocml_rint_f32(x); }
-__DEVICE__
-inline
-float rnorm3df(float x, float y, float z)
-{
-    return __ocml_rlen3_f32(x, y, z);
-}
-
-__DEVICE__
-inline
-float rnorm4df(float x, float y, float z, float w)
-{
-    return __ocml_rlen4_f32(x, y, z, w);
-}
-__DEVICE__
-inline
-float rnormf(int dim, const float* a)
-{   // TODO: placeholder until OCML adds support.
-    float r = 0;
-    while (dim--) { r += a[0] * a[0]; ++a; }
-
-    return __ocml_rsqrt_f32(r);
-}
-__DEVICE__
-inline
-float roundf(float x) { return __ocml_round_f32(x); }
-__DEVICE__
-inline
-float rsqrtf(float x) { return __ocml_rsqrt_f32(x); }
-__DEVICE__
-inline
-float scalblnf(float x, long int n)
-{
-    return (n < INT_MAX) ? __ocml_scalbn_f32(x, n) : __ocml_scalb_f32(x, n);
-}
-__DEVICE__
-inline
-float scalbnf(float x, int n) { return __ocml_scalbn_f32(x, n); }
-__DEVICE__
-inline
-__RETURN_TYPE signbit(float x) { return __ocml_signbit_f32(x); }
-__DEVICE__
-inline
-void sincosf(float x, float* sptr, float* cptr)
-{
-    float tmp;
-
-    *sptr =
-        __ocml_sincos_f32(x, (__attribute__((address_space(5))) float*) &tmp);
-    *cptr = tmp;
-}
-__DEVICE__
-inline
-void sincospif(float x, float* sptr, float* cptr)
-{
-    float tmp;
-
-    *sptr =
-        __ocml_sincospi_f32(x, (__attribute__((address_space(5))) float*) &tmp);
-    *cptr = tmp;
-}
-__DEVICE__
-inline
-float sinf(float x) { return __ocml_sin_f32(x); }
-__DEVICE__
-inline
-float sinhf(float x) { return __ocml_sinh_f32(x); }
-__DEVICE__
-inline
-float sinpif(float x) { return __ocml_sinpi_f32(x); }
-__DEVICE__
-inline
-float sqrtf(float x) { return __ocml_sqrt_f32(x); }
-__DEVICE__
-inline
-float tanf(float x) { return __ocml_tan_f32(x); }
-__DEVICE__
-inline
-float tanhf(float x) { return __ocml_tanh_f32(x); }
-__DEVICE__
-inline
-float tgammaf(float x) { return __ocml_tgamma_f32(x); }
-__DEVICE__
-inline
-float truncf(float x) { return __ocml_trunc_f32(x); }
-__DEVICE__
-inline
-float y0f(float x) { return __ocml_y0_f32(x); }
-__DEVICE__
-inline
-float y1f(float x) { return __ocml_y1_f32(x); }
-__DEVICE__
-inline
-float ynf(int n, float x)
-{   // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
-    //       for linear recurrences to get O(log n) steps, but it's unclear if
-    //       it'd be beneficial in this case. Placeholder until OCML adds
-    //       support.
-    if (n == 0) return y0f(x);
-    if (n == 1) return y1f(x);
-
-    float x0 = y0f(x);
-    float x1 = y1f(x);
-    for (int i = 1; i < n; ++i) {
-        float x2 = (2 * i) / x * x1 - x0;
-        x0 = x1;
-        x1 = x2;
-    }
-
-    return x1;
-}
-
-// BEGIN INTRINSICS
-__DEVICE__
-inline
-float __cosf(float x) { return __ocml_native_cos_f32(x); }
-__DEVICE__
-inline
-float __exp10f(float x) { return __ocml_native_exp10_f32(x); }
-__DEVICE__
-inline
-float __expf(float x) { return __ocml_native_exp_f32(x); }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-float __fadd_rd(float x, float y) { return __ocml_add_rtn_f32(x, y); }
-#endif
-__DEVICE__
-inline
-float __fadd_rn(float x, float y) { return x + y; }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-float __fadd_ru(float x, float y) { return __ocml_add_rtp_f32(x, y); }
-__DEVICE__
-inline
-float __fadd_rz(float x, float y) { return __ocml_add_rtz_f32(x, y); }
-__DEVICE__
-inline
-float __fdiv_rd(float x, float y) { return __ocml_div_rtn_f32(x, y); }
-#endif
-__DEVICE__
-inline
-float __fdiv_rn(float x, float y) { return x / y; }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-float __fdiv_ru(float x, float y) { return __ocml_div_rtp_f32(x, y); }
-__DEVICE__
-inline
-float __fdiv_rz(float x, float y) { return __ocml_div_rtz_f32(x, y); }
-#endif
-__DEVICE__
-inline
-float __fdividef(float x, float y) { return x / y; }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-float __fmaf_rd(float x, float y, float z)
-{
-    return __ocml_fma_rtn_f32(x, y, z);
-}
-#endif
-__DEVICE__
-inline
-float __fmaf_rn(float x, float y, float z)
-{
-    return __ocml_fma_f32(x, y, z);
-}
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-float __fmaf_ru(float x, float y, float z)
-{
-    return __ocml_fma_rtp_f32(x, y, z);
-}
-__DEVICE__
-inline
-float __fmaf_rz(float x, float y, float z)
-{
-   return __ocml_fma_rtz_f32(x, y, z);
-}
-__DEVICE__
-inline
-float __fmul_rd(float x, float y) { return __ocml_mul_rtn_f32(x, y); }
-#endif
-__DEVICE__
-inline
-float __fmul_rn(float x, float y) { return x * y; }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-float __fmul_ru(float x, float y)  { return __ocml_mul_rtp_f32(x, y); }
-__DEVICE__
-inline
-float __fmul_rz(float x, float y) { return __ocml_mul_rtz_f32(x, y); }
-__DEVICE__
-inline
-float __frcp_rd(float x) { return __llvm_amdgcn_rcp_f32(x); }
-#endif
-__DEVICE__
-inline
-float __frcp_rn(float x) { return __llvm_amdgcn_rcp_f32(x); }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-float __frcp_ru(float x) { return __llvm_amdgcn_rcp_f32(x); }
-__DEVICE__
-inline
-float __frcp_rz(float x) { return __llvm_amdgcn_rcp_f32(x); }
-#endif
-__DEVICE__
-inline
-float __frsqrt_rn(float x) { return __llvm_amdgcn_rsq_f32(x); }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-float __fsqrt_rd(float x) { return __ocml_sqrt_rtn_f32(x); }
-#endif
-__DEVICE__
-inline
-float __fsqrt_rn(float x) { return __ocml_native_sqrt_f32(x); }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-float __fsqrt_ru(float x) { return __ocml_sqrt_rtp_f32(x); }
-__DEVICE__
-inline
-float __fsqrt_rz(float x) { return __ocml_sqrt_rtz_f32(x); }
-__DEVICE__
-inline
-float __fsub_rd(float x, float y) { return __ocml_sub_rtn_f32(x, y); }
-#endif
-__DEVICE__
-inline
-float __fsub_rn(float x, float y) { return x - y; }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-float __fsub_ru(float x, float y) { return __ocml_sub_rtp_f32(x, y); }
-__DEVICE__
-inline
-float __fsub_rz(float x, float y) { return __ocml_sub_rtz_f32(x, y); }
-#endif
-__DEVICE__
-inline
-float __log10f(float x) { return __ocml_native_log10_f32(x); }
-__DEVICE__
-inline
-float __log2f(float x) { return __ocml_native_log2_f32(x); }
-__DEVICE__
-inline
-float __logf(float x) { return __ocml_native_log_f32(x); }
-__DEVICE__
-inline
-float __powf(float x, float y) { return __ocml_pow_f32(x, y); }
-__DEVICE__
-inline
-float __saturatef(float x) { return (x < 0) ? 0 : ((x > 1) ? 1 : x); }
-__DEVICE__
-inline
-void __sincosf(float x, float* sptr, float* cptr)
-{
-    *sptr = __ocml_native_sin_f32(x);
-    *cptr = __ocml_native_cos_f32(x);
-}
-__DEVICE__
-inline
-float __sinf(float x) { return __ocml_native_sin_f32(x); }
-__DEVICE__
-inline
-float __tanf(float x) { return __ocml_tan_f32(x); }
-// END INTRINSICS
-// END FLOAT
-
-// BEGIN DOUBLE
-__DEVICE__
-inline
-double abs(double x) { return __ocml_fabs_f64(x); }
-__DEVICE__
-inline
-double acos(double x) { return __ocml_acos_f64(x); }
-__DEVICE__
-inline
-double acosh(double x) { return __ocml_acosh_f64(x); }
-__DEVICE__
-inline
-double asin(double x) { return __ocml_asin_f64(x); }
-__DEVICE__
-inline
-double asinh(double x) { return __ocml_asinh_f64(x); }
-__DEVICE__
-inline
-double atan(double x) { return __ocml_atan_f64(x); }
-__DEVICE__
-inline
-double atan2(double x, double y) { return __ocml_atan2_f64(x, y); }
-__DEVICE__
-inline
-double atanh(double x) { return __ocml_atanh_f64(x); }
-__DEVICE__
-inline
-double cbrt(double x) { return __ocml_cbrt_f64(x); }
-__DEVICE__
-inline
-double ceil(double x) { return __ocml_ceil_f64(x); }
-__DEVICE__
-inline
-double copysign(double x, double y) { return __ocml_copysign_f64(x, y); }
-__DEVICE__
-inline
-double cos(double x)  { return __ocml_cos_f64(x); }
-__DEVICE__
-inline
-double cosh(double x) { return __ocml_cosh_f64(x); }
-__DEVICE__
-inline
-double cospi(double x) { return __ocml_cospi_f64(x); }
-__DEVICE__
-inline
-double cyl_bessel_i0(double x) { return __ocml_i0_f64(x); }
-__DEVICE__
-inline
-double cyl_bessel_i1(double x) { return __ocml_i1_f64(x); }
-__DEVICE__
-inline
-double erf(double x) { return __ocml_erf_f64(x); }
-__DEVICE__
-inline
-double erfc(double x) { return __ocml_erfc_f64(x); }
-__DEVICE__
-inline
-double erfcinv(double x) { return __ocml_erfcinv_f64(x); }
-__DEVICE__
-inline
-double erfcx(double x) { return __ocml_erfcx_f64(x); }
-__DEVICE__
-inline
-double erfinv(double x) { return __ocml_erfinv_f64(x); }
-__DEVICE__
-inline
-double exp(double x) { return __ocml_exp_f64(x); }
-__DEVICE__
-inline
-double exp10(double x) { return __ocml_exp10_f64(x); }
-__DEVICE__
-inline
-double exp2(double x) { return __ocml_exp2_f64(x); }
-__DEVICE__
-inline
-double expm1(double x) { return __ocml_expm1_f64(x); }
-__DEVICE__
-inline
-double fabs(double x) { return __ocml_fabs_f64(x); }
-__DEVICE__
-inline
-double fdim(double x, double y) { return __ocml_fdim_f64(x, y); }
-__DEVICE__
-inline
-double floor(double x) { return __ocml_floor_f64(x); }
-__DEVICE__
-inline
-double fma(double x, double y, double z) { return __ocml_fma_f64(x, y, z); }
-__DEVICE__
-inline
-double fmax(double x, double y) { return __ocml_fmax_f64(x, y); }
-__DEVICE__
-inline
-double fmin(double x, double y) { return __ocml_fmin_f64(x, y); }
-__DEVICE__
-inline
-double fmod(double x, double y) { return __ocml_fmod_f64(x, y); }
-__DEVICE__
-inline
-double frexp(double x, int* nptr)
-{
-    int tmp;
-    double r =
-        __ocml_frexp_f64(x, (__attribute__((address_space(5))) int*) &tmp);
-    *nptr = tmp;
-
-    return r;
-}
-__DEVICE__
-inline
-double hypot(double x, double y) { return __ocml_hypot_f64(x, y); }
-__DEVICE__
-inline
-int ilogb(double x) { return __ocml_ilogb_f64(x); }
-__DEVICE__
-inline
-__RETURN_TYPE isfinite(double x) { return __ocml_isfinite_f64(x); }
-__DEVICE__
-inline
-__RETURN_TYPE isinf(double x) { return __ocml_isinf_f64(x); }
-__DEVICE__
-inline
-__RETURN_TYPE isnan(double x) { return __ocml_isnan_f64(x); }
-__DEVICE__
-inline
-double j0(double x) { return __ocml_j0_f64(x); }
-__DEVICE__
-inline
-double j1(double x) { return __ocml_j1_f64(x); }
-__DEVICE__
-inline
-double jn(int n, double x)
-{   // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
-    //       for linear recurrences to get O(log n) steps, but it's unclear if
-    //       it'd be beneficial in this case. Placeholder until OCML adds
-    //       support.
-    if (n == 0) return j0f(x);
-    if (n == 1) return j1f(x);
-
-    double x0 = j0f(x);
-    double x1 = j1f(x);
-    for (int i = 1; i < n; ++i) {
-        double x2 = (2 * i) / x * x1 - x0;
-        x0 = x1;
-        x1 = x2;
-    }
-
-    return x1;
-}
-__DEVICE__
-inline
-double ldexp(double x, int e) { return __ocml_ldexp_f64(x, e); }
-__DEVICE__
-inline
-double lgamma(double x) { return __ocml_lgamma_f64(x); }
-__DEVICE__
-inline
-long long int llrint(double x) { return __ocml_rint_f64(x); }
-__DEVICE__
-inline
-long long int llround(double x) { return __ocml_round_f64(x); }
-__DEVICE__
-inline
-double log(double x) { return __ocml_log_f64(x); }
-__DEVICE__
-inline
-double log10(double x) { return __ocml_log10_f64(x); }
-__DEVICE__
-inline
-double log1p(double x) { return __ocml_log1p_f64(x); }
-__DEVICE__
-inline
-double log2(double x) { return __ocml_log2_f64(x); }
-__DEVICE__
-inline
-double logb(double x) { return __ocml_logb_f64(x); }
-__DEVICE__
-inline
-long int lrint(double x) { return __ocml_rint_f64(x); }
-__DEVICE__
-inline
-long int lround(double x) { return __ocml_round_f64(x); }
-__DEVICE__
-inline
-double modf(double x, double* iptr)
-{
-    double tmp;
-    double r =
-        __ocml_modf_f64(x, (__attribute__((address_space(5))) double*) &tmp);
-    *iptr = tmp;
-
-    return r;
-}
-__DEVICE__
-inline
-double nan(const char* tagp)
-{
-#if !_WIN32
-    union {
-        double val;
-        struct ieee_double {
-            uint64_t mantissa : 51;
-            uint32_t quiet : 1;
-            uint32_t exponent : 11;
-            uint32_t sign : 1;
-        }  bits;
-        static_assert(sizeof(double) == sizeof(ieee_double), "");
-    } tmp;
-
-    tmp.bits.sign = 0u;
-    tmp.bits.exponent = ~0u;
-    tmp.bits.quiet = 1u;
-    tmp.bits.mantissa = __make_mantissa(tagp);
-
-    return tmp.val;
-#else
-    static_assert(sizeof(uint64_t)==sizeof(double));
-    uint64_t val = __make_mantissa(tagp);
-    val |= 0xFFF << 51;
-    return *reinterpret_cast<double*>(&val);
-#endif
-}
-__DEVICE__
-inline
-double nearbyint(double x) { return __ocml_nearbyint_f64(x); }
-__DEVICE__
-inline
-double nextafter(double x, double y) { return __ocml_nextafter_f64(x, y); }
-__DEVICE__
-inline
-double norm(int dim, const double* a)
-{   // TODO: placeholder until OCML adds support.
-    double r = 0;
-    while (dim--) { r += a[0] * a[0]; ++a; }
-
-    return __ocml_sqrt_f64(r);
-}
-__DEVICE__
-inline
-double norm3d(double x, double y, double z)
-{
-    return __ocml_len3_f64(x, y, z);
-}
-__DEVICE__
-inline
-double norm4d(double x, double y, double z, double w)
-{
-    return __ocml_len4_f64(x, y, z, w);
-}
-__DEVICE__
-inline
-double normcdf(double x) { return __ocml_ncdf_f64(x); }
-__DEVICE__
-inline
-double normcdfinv(double x) { return __ocml_ncdfinv_f64(x); }
-__DEVICE__
-inline
-double pow(double x, double y) { return __ocml_pow_f64(x, y); }
-__DEVICE__
-inline
-double powi(double base, int iexp) { return __ocml_pown_f64(base, iexp); }
-__DEVICE__
-inline
-double rcbrt(double x) { return __ocml_rcbrt_f64(x); }
-__DEVICE__
-inline
-double remainder(double x, double y) { return __ocml_remainder_f64(x, y); }
-__DEVICE__
-inline
-double remquo(double x, double y, int* quo)
-{
-    int tmp;
-    double r =
-        __ocml_remquo_f64(x, y, (__attribute__((address_space(5))) int*) &tmp);
-    *quo = tmp;
-
-    return r;
-}
-__DEVICE__
-inline
-double rhypot(double x, double y) { return __ocml_rhypot_f64(x, y); }
-__DEVICE__
-inline
-double rint(double x) { return __ocml_rint_f64(x); }
-__DEVICE__
-inline
-double rnorm(int dim, const double* a)
-{   // TODO: placeholder until OCML adds support.
-    double r = 0;
-    while (dim--) { r += a[0] * a[0]; ++a; }
-
-    return __ocml_rsqrt_f64(r);
-}
-__DEVICE__
-inline
-double rnorm3d(double x, double y, double z)
-{
-    return __ocml_rlen3_f64(x, y, z);
-}
-__DEVICE__
-inline
-double rnorm4d(double x, double y, double z, double w)
-{
-    return __ocml_rlen4_f64(x, y, z, w);
-}
-__DEVICE__
-inline
-double round(double x) { return __ocml_round_f64(x); }
-__DEVICE__
-inline
-double rsqrt(double x) { return __ocml_rsqrt_f64(x); }
-__DEVICE__
-inline
-double scalbln(double x, long int n)
-{
-    return (n < INT_MAX) ? __ocml_scalbn_f64(x, n) : __ocml_scalb_f64(x, n);
-}
-__DEVICE__
-inline
-double scalbn(double x, int n) { return __ocml_scalbn_f64(x, n); }
-__DEVICE__
-inline
-__RETURN_TYPE signbit(double x) { return __ocml_signbit_f64(x); }
-__DEVICE__
-inline
-double sin(double x) { return __ocml_sin_f64(x); }
-__DEVICE__
-inline
-void sincos(double x, double* sptr, double* cptr)
-{
-    double tmp;
-    *sptr =
-        __ocml_sincos_f64(x, (__attribute__((address_space(5))) double*) &tmp);
-    *cptr = tmp;
-}
-__DEVICE__
-inline
-void sincospi(double x, double* sptr, double* cptr)
-{
-    double tmp;
-    *sptr = __ocml_sincospi_f64(
-        x, (__attribute__((address_space(5))) double*) &tmp);
-    *cptr = tmp;
-}
-__DEVICE__
-inline
-double sinh(double x) { return __ocml_sinh_f64(x); }
-__DEVICE__
-inline
-double sinpi(double x) { return __ocml_sinpi_f64(x); }
-__DEVICE__
-inline
-double sqrt(double x) { return __ocml_sqrt_f64(x); }
-__DEVICE__
-inline
-double tan(double x) { return __ocml_tan_f64(x); }
-__DEVICE__
-inline
-double tanh(double x) { return __ocml_tanh_f64(x); }
-__DEVICE__
-inline
-double tgamma(double x) { return __ocml_tgamma_f64(x); }
-__DEVICE__
-inline
-double trunc(double x) { return __ocml_trunc_f64(x); }
-__DEVICE__
-inline
-double y0(double x) { return __ocml_y0_f64(x); }
-__DEVICE__
-inline
-double y1(double x) { return __ocml_y1_f64(x); }
-__DEVICE__
-inline
-double yn(int n, double x)
-{   // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
-    //       for linear recurrences to get O(log n) steps, but it's unclear if
-    //       it'd be beneficial in this case. Placeholder until OCML adds
-    //       support.
-    if (n == 0) return j0f(x);
-    if (n == 1) return j1f(x);
-
-    double x0 = j0f(x);
-    double x1 = j1f(x);
-    for (int i = 1; i < n; ++i) {
-        double x2 = (2 * i) / x * x1 - x0;
-        x0 = x1;
-        x1 = x2;
-    }
-
-    return x1;
-}
-
-// BEGIN INTRINSICS
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-double __dadd_rd(double x, double y) { return __ocml_add_rtn_f64(x, y); }
-#endif
-__DEVICE__
-inline
-double __dadd_rn(double x, double y) { return x + y; }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-double __dadd_ru(double x, double y) { return __ocml_add_rtp_f64(x, y); }
-__DEVICE__
-inline
-double __dadd_rz(double x, double y) { return __ocml_add_rtz_f64(x, y); }
-__DEVICE__
-inline
-double __ddiv_rd(double x, double y) { return __ocml_div_rtn_f64(x, y); }
-#endif
-__DEVICE__
-inline
-double __ddiv_rn(double x, double y) { return x / y; }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-double __ddiv_ru(double x, double y) { return __ocml_div_rtp_f64(x, y); }
-__DEVICE__
-inline
-double __ddiv_rz(double x, double y) { return __ocml_div_rtz_f64(x, y); }
-__DEVICE__
-inline
-double __dmul_rd(double x, double y) { return __ocml_mul_rtn_f64(x, y); }
-#endif
-__DEVICE__
-inline
-double __dmul_rn(double x, double y) { return x * y; }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-double __dmul_ru(double x, double y) { return __ocml_mul_rtp_f64(x, y); }
-__DEVICE__
-inline
-double __dmul_rz(double x, double y) { return __ocml_mul_rtz_f64(x, y); }
-__DEVICE__
-inline
-double __drcp_rd(double x) { return __llvm_amdgcn_rcp_f64(x); }
-#endif
-__DEVICE__
-inline
-double __drcp_rn(double x) { return __llvm_amdgcn_rcp_f64(x); }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-double __drcp_ru(double x) { return __llvm_amdgcn_rcp_f64(x); }
-__DEVICE__
-inline
-double __drcp_rz(double x) { return __llvm_amdgcn_rcp_f64(x); }
-__DEVICE__
-inline
-double __dsqrt_rd(double x) { return __ocml_sqrt_rtn_f64(x); }
-#endif
-__DEVICE__
-inline
-double __dsqrt_rn(double x) { return __ocml_sqrt_f64(x); }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-double __dsqrt_ru(double x) { return __ocml_sqrt_rtp_f64(x); }
-__DEVICE__
-inline
-double __dsqrt_rz(double x) { return __ocml_sqrt_rtz_f64(x); }
-__DEVICE__
-inline
-double __dsub_rd(double x, double y) { return __ocml_sub_rtn_f64(x, y); }
-#endif
-__DEVICE__
-inline
-double __dsub_rn(double x, double y) { return x - y; }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-double __dsub_ru(double x, double y) { return __ocml_sub_rtp_f64(x, y); }
-__DEVICE__
-inline
-double __dsub_rz(double x, double y) { return __ocml_sub_rtz_f64(x, y); }
-__DEVICE__
-inline
-double __fma_rd(double x, double y, double z)
-{
-    return __ocml_fma_rtn_f64(x, y, z);
-}
-#endif
-__DEVICE__
-inline
-double __fma_rn(double x, double y, double z)
-{
-    return __ocml_fma_f64(x, y, z);
-}
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-double __fma_ru(double x, double y, double z)
-{
-    return __ocml_fma_rtp_f64(x, y, z);
-}
-__DEVICE__
-inline
-double __fma_rz(double x, double y, double z)
-{
-    return __ocml_fma_rtz_f64(x, y, z);
-}
-#endif
-// END INTRINSICS
-// END DOUBLE
-
-// BEGIN INTEGER
-__DEVICE__
-inline
-int abs(int x)
-{
-    int sgn = x >> (sizeof(int) * CHAR_BIT - 1);
-    return (x ^ sgn) - sgn;
-}
-__DEVICE__
-inline
-long labs(long x)
-{
-    long sgn = x >> (sizeof(long) * CHAR_BIT - 1);
-    return (x ^ sgn) - sgn;
-}
-__DEVICE__
-inline
-long long llabs(long long x)
-{
-    long long sgn = x >> (sizeof(long long) * CHAR_BIT - 1);
-    return (x ^ sgn) - sgn;
-}
-
-#if defined(__cplusplus)
-    __DEVICE__
-    inline
-    long abs(long x) { return labs(x); }
-    __DEVICE__
-    inline
-    long long abs(long long x) { return llabs(x); }
-#endif
-// END INTEGER
-
-__DEVICE__
-inline _Float16 fma(_Float16 x, _Float16 y, _Float16 z) {
-    return __ocml_fma_f16(x, y, z);
-}
-
-__DEVICE__
-inline float fma(float x, float y, float z) {
-    return fmaf(x, y, z);
-}
-
-#pragma push_macro("__DEF_FLOAT_FUN")
-#pragma push_macro("__DEF_FLOAT_FUN2")
-#pragma push_macro("__DEF_FLOAT_FUN2I")
-#pragma push_macro("__HIP_OVERLOAD")
-#pragma push_macro("__HIP_OVERLOAD2")
-
-// __hip_enable_if::type is a type function which returns __T if __B is true.
-template<bool __B, class __T = void>
-struct __hip_enable_if {};
-
-template <class __T> struct __hip_enable_if<true, __T> {
-  typedef __T type;
-};
-
-// __HIP_OVERLOAD1 is used to resolve function calls with integer argument to
-// avoid compilation error due to ambibuity. e.g. floor(5) is resolved with
-// floor(double).
-#define __HIP_OVERLOAD1(__retty, __fn)                                         \
-  template <typename __T>                                                      \
-  __DEVICE__                                                                   \
-      typename __hip_enable_if<std::numeric_limits<__T>::is_integer,           \
-                                      __retty>::type                           \
-      __fn(__T __x) {                                                          \
-    return ::__fn((double)__x);                                                \
-  }
-
-// __HIP_OVERLOAD2 is used to resolve function calls with mixed float/double
-// or integer argument to avoid compilation error due to ambibuity. e.g.
-// max(5.0f, 6.0) is resolved with max(double, double).
-#define __HIP_OVERLOAD2(__retty, __fn)                                         \
-  template <typename __T1, typename __T2>                                      \
-  __DEVICE__ typename __hip_enable_if<                                         \
-      std::numeric_limits<__T1>::is_specialized &&                             \
-          std::numeric_limits<__T2>::is_specialized,                           \
-      __retty>::type                                                           \
-  __fn(__T1 __x, __T2 __y) {                                                   \
-    return __fn((double)__x, (double)__y);                                     \
-  }
-
-// Define cmath functions with float argument and returns float.
-#define __DEF_FUN1(retty, func) \
-__DEVICE__ \
-inline \
-float func(float x) \
-{ \
-  return func##f(x); \
-} \
-__HIP_OVERLOAD1(retty, func)
-
-// Define cmath functions with float argument and returns retty.
-#define __DEF_FUNI(retty, func) \
-__DEVICE__ \
-inline \
-retty func(float x) \
-{ \
-  return func##f(x); \
-} \
-__HIP_OVERLOAD1(retty, func)
-
-// define cmath functions with two float arguments.
-#define __DEF_FUN2(retty, func) \
-__DEVICE__ \
-inline \
-float func(float x, float y) \
-{ \
-  return func##f(x, y); \
-} \
-__HIP_OVERLOAD2(retty, func)
-
-__DEF_FUN1(double, acos)
-__DEF_FUN1(double, acosh)
-__DEF_FUN1(double, asin)
-__DEF_FUN1(double, asinh)
-__DEF_FUN1(double, atan)
-__DEF_FUN2(double, atan2);
-__DEF_FUN1(double, atanh)
-__DEF_FUN1(double, cbrt)
-__DEF_FUN1(double, ceil)
-__DEF_FUN2(double, copysign);
-__DEF_FUN1(double, cos)
-__DEF_FUN1(double, cosh)
-__DEF_FUN1(double, erf)
-__DEF_FUN1(double, erfc)
-__DEF_FUN1(double, exp)
-__DEF_FUN1(double, exp2)
-__DEF_FUN1(double, expm1)
-__DEF_FUN1(double, fabs)
-__DEF_FUN2(double, fdim);
-__DEF_FUN1(double, floor)
-__DEF_FUN2(double, fmax);
-__DEF_FUN2(double, fmin);
-__DEF_FUN2(double, fmod);
-//__HIP_OVERLOAD1(int, fpclassify)
-__DEF_FUN2(double, hypot);
-__DEF_FUNI(int, ilogb)
-__HIP_OVERLOAD1(bool, isfinite)
-__HIP_OVERLOAD2(bool, isgreater);
-__HIP_OVERLOAD2(bool, isgreaterequal);
-__HIP_OVERLOAD1(bool, isinf);
-__HIP_OVERLOAD2(bool, isless);
-__HIP_OVERLOAD2(bool, islessequal);
-__HIP_OVERLOAD2(bool, islessgreater);
-__HIP_OVERLOAD1(bool, isnan);
-//__HIP_OVERLOAD1(bool, isnormal)
-__HIP_OVERLOAD2(bool, isunordered);
-__DEF_FUN1(double, lgamma)
-__DEF_FUN1(double, log)
-__DEF_FUN1(double, log10)
-__DEF_FUN1(double, log1p)
-__DEF_FUN1(double, log2)
-__DEF_FUN1(double, logb)
-__DEF_FUNI(long long, llrint)
-__DEF_FUNI(long long, llround)
-__DEF_FUNI(long, lrint)
-__DEF_FUNI(long, lround)
-__DEF_FUN1(double, nearbyint);
-__DEF_FUN2(double, nextafter);
-__DEF_FUN2(double, pow);
-__DEF_FUN2(double, remainder);
-__DEF_FUN1(double, rint);
-__DEF_FUN1(double, round);
-__HIP_OVERLOAD1(bool, signbit)
-__DEF_FUN1(double, sin)
-__DEF_FUN1(double, sinh)
-__DEF_FUN1(double, sqrt)
-__DEF_FUN1(double, tan)
-__DEF_FUN1(double, tanh)
-__DEF_FUN1(double, tgamma)
-__DEF_FUN1(double, trunc);
-
-// define cmath functions with a float and an integer argument.
-#define __DEF_FLOAT_FUN2I(func) \
-__DEVICE__ \
-inline \
-float func(float x, int y) \
-{ \
-  return func##f(x, y); \
-}
-__DEF_FLOAT_FUN2I(scalbn)
-__DEF_FLOAT_FUN2I(ldexp)
-
-template<class T>
-__DEVICE__ inline T min(T arg1, T arg2) {
-  return (arg1 < arg2) ? arg1 : arg2;
-}
-
-template<class T>
-__DEVICE__ inline T max(T arg1, T arg2) {
-  return (arg1 > arg2) ? arg1 : arg2;
-}
-
-#if __HCC__
-
-__DEVICE__ inline static uint32_t min(uint32_t arg1, int32_t arg2) {
-  return min(arg1, (uint32_t) arg2);
-}
-/*__DEVICE__ inline static uint32_t min(int32_t arg1, uint32_t arg2) {
-  return min((uint32_t) arg1, arg2);
-}
-
-__DEVICE__ inline static uint64_t min(uint64_t arg1, int64_t arg2) {
-  return min(arg1, (uint64_t) arg2);
-}
-__DEVICE__ inline static uint64_t min(int64_t arg1, uint64_t arg2) {
-  return min((uint64_t) arg1, arg2);
-}
-
-__DEVICE__ inline static unsigned long long min(unsigned long long arg1, long long arg2) {
-  return min(arg1, (unsigned long long) arg2);
-}
-__DEVICE__ inline static unsigned long long min(long long arg1, unsigned long long arg2) {
-  return min((unsigned long long) arg1, arg2);
-}*/
-
-__DEVICE__ inline static uint32_t max(uint32_t arg1, int32_t arg2) {
-  return max(arg1, (uint32_t) arg2);
-}
-__DEVICE__ inline static uint32_t max(int32_t arg1, uint32_t arg2) {
-  return max((uint32_t) arg1, arg2);
-}
-
-/*__DEVICE__ inline static uint64_t max(uint64_t arg1, int64_t arg2) {
-  return max(arg1, (uint64_t) arg2);
-}
-__DEVICE__ inline static uint64_t max(int64_t arg1, uint64_t arg2) {
-  return max((uint64_t) arg1, arg2);
-}
-
-__DEVICE__ inline static unsigned long long max(unsigned long long arg1, long long arg2) {
-  return max(arg1, (unsigned long long) arg2);
-}
-__DEVICE__ inline static unsigned long long max(long long arg1, unsigned long long arg2) {
-  return max((unsigned long long) arg1, arg2);
-}*/
-#else
-__DEVICE__ inline int min(int arg1, int arg2) {
-  return (arg1 < arg2) ? arg1 : arg2;
-}
-__DEVICE__ inline int max(int arg1, int arg2) {
-  return (arg1 > arg2) ? arg1 : arg2;
-}
-
-__DEVICE__ inline int min(uint32_t arg1, int arg2) {
-  return (arg1 < arg2) ? arg1 : arg2;
-}
-__DEVICE__ inline int max(uint32_t arg1, int arg2) {
-  return (arg1 > arg2) ? arg1 : arg2;
-}
-
-__DEVICE__
-inline
-float max(float x, float y) {
-  return fmaxf(x, y);
-}
-
-__DEVICE__
-inline
-double max(double x, double y) {
-  return fmax(x, y);
-}
-
-__DEVICE__
-inline
-float min(float x, float y) {
-  return fminf(x, y);
-}
-
-__DEVICE__
-inline
-double min(double x, double y) {
-  return fmin(x, y);
-}
-
-__HIP_OVERLOAD2(double, max)
-__HIP_OVERLOAD2(double, min)
-
-#endif
-
-__host__ inline static int min(int arg1, int arg2) {
-  return std::min(arg1, arg2);
-}
-
-__host__ inline static int max(int arg1, int arg2) {
-  return std::max(arg1, arg2);
-}
-
-__DEVICE__
-inline float pow(float base, int iexp) {
-  return powif(base, iexp);
-}
-
-__DEVICE__
-inline double pow(double base, int iexp) {
-  return powi(base, iexp);
-}
-
-__DEVICE__
-inline _Float16 pow(_Float16 base, int iexp) {
-  return __ocml_pown_f16(base, iexp);
-}
-
-#pragma pop_macro("__DEF_FLOAT_FUN")
-#pragma pop_macro("__DEF_FLOAT_FUN2")
-#pragma pop_macro("__DEF_FLOAT_FUN2I")
-#pragma pop_macro("__HIP_OVERLOAD")
-#pragma pop_macro("__HIP_OVERLOAD2")
-
-#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
-
-#pragma pop_macro("__DEVICE__")
-#pragma pop_macro("__RETURN_TYPE")
-
-// For backward compatibility.
-// There are HIP applications e.g. TensorFlow, expecting __HIP_ARCH_* macros
-// defined after including math_functions.h.
-#include <hip/hcc_detail/hip_runtime.h>
diff --git a/third_party/rocm/include/hip/hcc_detail/math_fwd.h b/third_party/rocm/include/hip/hcc_detail/math_fwd.h
deleted file mode 100644
index c197af8..0000000
--- a/third_party/rocm/include/hip/hcc_detail/math_fwd.h
+++ /dev/null
@@ -1,714 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#pragma once
-
-#include "host_defines.h"
-#if defined(__cplusplus)
-    extern "C" {
-#endif
-
-// DOT FUNCTIONS
-#if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__
-__device__
-__attribute__((const))
-int __ockl_sdot2(
-    HIP_vector_base<short, 2>::Native_vec_,
-    HIP_vector_base<short, 2>::Native_vec_,
-    int, bool);
-
-__device__
-__attribute__((const))
-unsigned int __ockl_udot2(
-    HIP_vector_base<unsigned short, 2>::Native_vec_,
-    HIP_vector_base<unsigned short, 2>::Native_vec_,
-    unsigned int, bool);
-
-__device__
-__attribute__((const))
-int __ockl_sdot4(
-    HIP_vector_base<char, 4>::Native_vec_,
-    HIP_vector_base<char, 4>::Native_vec_,
-    int, bool);
-
-__device__
-__attribute__((const))
-unsigned int __ockl_udot4(
-    HIP_vector_base<unsigned char, 4>::Native_vec_,
-    HIP_vector_base<unsigned char, 4>::Native_vec_,
-    unsigned int, bool);
-
-__device__
-__attribute__((const))
-int __ockl_sdot8(int, int, int, bool);
-
-__device__
-__attribute__((const))
-unsigned int __ockl_udot8(unsigned int, unsigned int, unsigned int, bool);
-#endif
-
-#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
-// BEGIN FLOAT
-__device__
-__attribute__((const))
-float __ocml_acos_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_acosh_f32(float);
-__device__
-__attribute__((const))
-float __ocml_asin_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_asinh_f32(float);
-__device__
-__attribute__((const))
-float __ocml_atan2_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_atan_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_atanh_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_cbrt_f32(float);
-__device__
-__attribute__((const))
-float __ocml_ceil_f32(float);
-__device__
-__attribute__((const))
-__device__
-float __ocml_copysign_f32(float, float);
-__device__
-float __ocml_cos_f32(float);
-__device__
-float __ocml_native_cos_f32(float);
-__device__
-__attribute__((pure))
-__device__
-float __ocml_cosh_f32(float);
-__device__
-float __ocml_cospi_f32(float);
-__device__
-float __ocml_i0_f32(float);
-__device__
-float __ocml_i1_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_erfc_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_erfcinv_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_erfcx_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_erf_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_erfinv_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_exp10_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_native_exp10_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_exp2_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_exp_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_native_exp_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_expm1_f32(float);
-__device__
-__attribute__((const))
-float __ocml_fabs_f32(float);
-__device__
-__attribute__((const))
-float __ocml_fdim_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_floor_f32(float);
-__device__
-__attribute__((const))
-float __ocml_fma_f32(float, float, float);
-__device__
-__attribute__((const))
-float __ocml_fmax_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_fmin_f32(float, float);
-__device__
-__attribute__((const))
-__device__
-float __ocml_fmod_f32(float, float);
-__device__
-float __ocml_frexp_f32(float, __attribute__((address_space(5))) int*);
-__device__
-__attribute__((const))
-float __ocml_hypot_f32(float, float);
-__device__
-__attribute__((const))
-int __ocml_ilogb_f32(float);
-__device__
-__attribute__((const))
-int __ocml_isfinite_f32(float);
-__device__
-__attribute__((const))
-int __ocml_isinf_f32(float);
-__device__
-__attribute__((const))
-int __ocml_isnan_f32(float);
-__device__
-float __ocml_j0_f32(float);
-__device__
-float __ocml_j1_f32(float);
-__device__
-__attribute__((const))
-float __ocml_ldexp_f32(float, int);
-__device__
-float __ocml_lgamma_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_log10_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_native_log10_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_log1p_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_log2_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_native_log2_f32(float);
-__device__
-__attribute__((const))
-float __ocml_logb_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_log_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_native_log_f32(float);
-__device__
-float __ocml_modf_f32(float, __attribute__((address_space(5))) float*);
-__device__
-__attribute__((const))
-float __ocml_nearbyint_f32(float);
-__device__
-__attribute__((const))
-float __ocml_nextafter_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_len3_f32(float, float, float);
-__device__
-__attribute__((const))
-float __ocml_len4_f32(float, float, float, float);
-__device__
-__attribute__((pure))
-float __ocml_ncdf_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_ncdfinv_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_pow_f32(float, float);
-__device__
-__attribute__((pure))
-float __ocml_pown_f32(float, int);
-__device__
-__attribute__((pure))
-float __ocml_rcbrt_f32(float);
-__device__
-__attribute__((const))
-float __ocml_remainder_f32(float, float);
-__device__
-float __ocml_remquo_f32(float, float, __attribute__((address_space(5))) int*);
-__device__
-__attribute__((const))
-float __ocml_rhypot_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_rint_f32(float);
-__device__
-__attribute__((const))
-float __ocml_rlen3_f32(float, float, float);
-__device__
-__attribute__((const))
-float __ocml_rlen4_f32(float, float, float, float);
-__device__
-__attribute__((const))
-float __ocml_round_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_rsqrt_f32(float);
-__device__
-__attribute__((const))
-float __ocml_scalb_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_scalbn_f32(float, int);
-__device__
-__attribute__((const))
-int __ocml_signbit_f32(float);
-__device__
-float __ocml_sincos_f32(float, __attribute__((address_space(5))) float*);
-__device__
-float __ocml_sincospi_f32(float, __attribute__((address_space(5))) float*);
-__device__
-float __ocml_sin_f32(float);
-__device__
-float __ocml_native_sin_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_sinh_f32(float);
-__device__
-float __ocml_sinpi_f32(float);
-__device__
-__attribute__((const))
-float __ocml_sqrt_f32(float);
-__device__
-__attribute__((const))
-float __ocml_native_sqrt_f32(float);
-__device__
-float __ocml_tan_f32(float);
-__device__
-__attribute__((pure))
-float __ocml_tanh_f32(float);
-__device__
-float __ocml_tgamma_f32(float);
-__device__
-__attribute__((const))
-float __ocml_trunc_f32(float);
-__device__
-float __ocml_y0_f32(float);
-__device__
-float __ocml_y1_f32(float);
-
-// BEGIN INTRINSICS
-__device__
-__attribute__((const))
-float __ocml_add_rte_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_add_rtn_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_add_rtp_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_add_rtz_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_sub_rte_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_sub_rtn_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_sub_rtp_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_sub_rtz_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_mul_rte_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_mul_rtn_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_mul_rtp_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_mul_rtz_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_div_rte_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_div_rtn_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_div_rtp_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_div_rtz_f32(float, float);
-__device__
-__attribute__((const))
-float __ocml_sqrt_rte_f32(float);
-__device__
-__attribute__((const))
-float __ocml_sqrt_rtn_f32(float);
-__device__
-__attribute__((const))
-float __ocml_sqrt_rtp_f32(float);
-__device__
-__attribute__((const))
-float __ocml_sqrt_rtz_f32(float);
-__device__
-__attribute__((const))
-float __ocml_fma_rte_f32(float, float, float);
-__device__
-__attribute__((const))
-float __ocml_fma_rtn_f32(float, float, float);
-__device__
-__attribute__((const))
-float __ocml_fma_rtp_f32(float, float, float);
-__device__
-__attribute__((const))
-float __ocml_fma_rtz_f32(float, float, float);
-
-__device__
-__attribute__((const))
-float __llvm_amdgcn_cos_f32(float) __asm("llvm.amdgcn.cos.f32");
-__device__
-__attribute__((const))
-float __llvm_amdgcn_rcp_f32(float) __asm("llvm.amdgcn.rcp.f32");
-__device__
-__attribute__((const))
-float __llvm_amdgcn_rsq_f32(float) __asm("llvm.amdgcn.rsq.f32");
-__device__
-__attribute__((const))
-float __llvm_amdgcn_sin_f32(float) __asm("llvm.amdgcn.sin.f32");
-// END INTRINSICS
-// END FLOAT
-
-// BEGIN DOUBLE
-__device__
-__attribute__((const))
-double __ocml_acos_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_acosh_f64(double);
-__device__
-__attribute__((const))
-double __ocml_asin_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_asinh_f64(double);
-__device__
-__attribute__((const))
-double __ocml_atan2_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_atan_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_atanh_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_cbrt_f64(double);
-__device__
-__attribute__((const))
-double __ocml_ceil_f64(double);
-__device__
-__attribute__((const))
-double __ocml_copysign_f64(double, double);
-__device__
-double __ocml_cos_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_cosh_f64(double);
-__device__
-double __ocml_cospi_f64(double);
-__device__
-double __ocml_i0_f64(double);
-__device__
-double __ocml_i1_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_erfc_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_erfcinv_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_erfcx_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_erf_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_erfinv_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_exp10_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_exp2_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_exp_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_expm1_f64(double);
-__device__
-__attribute__((const))
-double __ocml_fabs_f64(double);
-__device__
-__attribute__((const))
-double __ocml_fdim_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_floor_f64(double);
-__device__
-__attribute__((const))
-double __ocml_fma_f64(double, double, double);
-__device__
-__attribute__((const))
-double __ocml_fmax_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_fmin_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_fmod_f64(double, double);
-__device__
-double __ocml_frexp_f64(double, __attribute__((address_space(5))) int*);
-__device__
-__attribute__((const))
-double __ocml_hypot_f64(double, double);
-__device__
-__attribute__((const))
-int __ocml_ilogb_f64(double);
-__device__
-__attribute__((const))
-int __ocml_isfinite_f64(double);
-__device__
-__attribute__((const))
-int __ocml_isinf_f64(double);
-__device__
-__attribute__((const))
-int __ocml_isnan_f64(double);
-__device__
-double __ocml_j0_f64(double);
-__device__
-double __ocml_j1_f64(double);
-__device__
-__attribute__((const))
-double __ocml_ldexp_f64(double, int);
-__device__
-double __ocml_lgamma_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_log10_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_log1p_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_log2_f64(double);
-__device__
-__attribute__((const))
-double __ocml_logb_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_log_f64(double);
-__device__
-double __ocml_modf_f64(double, __attribute__((address_space(5))) double*);
-__device__
-__attribute__((const))
-double __ocml_nearbyint_f64(double);
-__device__
-__attribute__((const))
-double __ocml_nextafter_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_len3_f64(double, double, double);
-__device__
-__attribute__((const))
-double __ocml_len4_f64(double, double, double, double);
-__device__
-__attribute__((pure))
-double __ocml_ncdf_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_ncdfinv_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_pow_f64(double, double);
-__device__
-__attribute__((pure))
-double __ocml_pown_f64(double, int);
-__device__
-__attribute__((pure))
-double __ocml_rcbrt_f64(double);
-__device__
-__attribute__((const))
-double __ocml_remainder_f64(double, double);
-__device__
-double __ocml_remquo_f64(
-    double, double, __attribute__((address_space(5))) int*);
-__device__
-__attribute__((const))
-double __ocml_rhypot_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_rint_f64(double);
-__device__
-__attribute__((const))
-double __ocml_rlen3_f64(double, double, double);
-__device__
-__attribute__((const))
-double __ocml_rlen4_f64(double, double, double, double);
-__device__
-__attribute__((const))
-double __ocml_round_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_rsqrt_f64(double);
-__device__
-__attribute__((const))
-double __ocml_scalb_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_scalbn_f64(double, int);
-__device__
-__attribute__((const))
-int __ocml_signbit_f64(double);
-__device__
-double __ocml_sincos_f64(double, __attribute__((address_space(5))) double*);
-__device__
-double __ocml_sincospi_f64(double, __attribute__((address_space(5))) double*);
-__device__
-double __ocml_sin_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_sinh_f64(double);
-__device__
-double __ocml_sinpi_f64(double);
-__device__
-__attribute__((const))
-double __ocml_sqrt_f64(double);
-__device__
-double __ocml_tan_f64(double);
-__device__
-__attribute__((pure))
-double __ocml_tanh_f64(double);
-__device__
-double __ocml_tgamma_f64(double);
-__device__
-__attribute__((const))
-double __ocml_trunc_f64(double);
-__device__
-double __ocml_y0_f64(double);
-__device__
-double __ocml_y1_f64(double);
-
-// BEGIN INTRINSICS
-__device__
-__attribute__((const))
-double __ocml_add_rte_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_add_rtn_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_add_rtp_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_add_rtz_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_sub_rte_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_sub_rtn_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_sub_rtp_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_sub_rtz_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_mul_rte_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_mul_rtn_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_mul_rtp_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_mul_rtz_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_div_rte_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_div_rtn_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_div_rtp_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_div_rtz_f64(double, double);
-__device__
-__attribute__((const))
-double __ocml_sqrt_rte_f64(double);
-__device__
-__attribute__((const))
-double __ocml_sqrt_rtn_f64(double);
-__device__
-__attribute__((const))
-double __ocml_sqrt_rtp_f64(double);
-__device__
-__attribute__((const))
-double __ocml_sqrt_rtz_f64(double);
-__device__
-__attribute__((const))
-double __ocml_fma_rte_f64(double, double, double);
-__device__
-__attribute__((const))
-double __ocml_fma_rtn_f64(double, double, double);
-__device__
-__attribute__((const))
-double __ocml_fma_rtp_f64(double, double, double);
-__device__
-__attribute__((const))
-double __ocml_fma_rtz_f64(double, double, double);
-
-__device__
-__attribute__((const))
-double __llvm_amdgcn_rcp_f64(double) __asm("llvm.amdgcn.rcp.f64");
-__device__
-__attribute__((const))
-double __llvm_amdgcn_rsq_f64(double) __asm("llvm.amdgcn.rsq.f64");
-// END INTRINSICS
-// END DOUBLE
-
-#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
-
-#if defined(__cplusplus)
-    } // extern "C"
-#endif
diff --git a/third_party/rocm/include/hip/hcc_detail/ockl_image.h b/third_party/rocm/include/hip/hcc_detail/ockl_image.h
deleted file mode 100644
index b32b23f..0000000
--- a/third_party/rocm/include/hip/hcc_detail/ockl_image.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#pragma once
-
-#include <hip/hip_vector_types.h>
-
-extern "C" {
-
-#define ADDRESS_SPACE_CONSTANT __attribute__((address_space(4)))
-
-__device__ float4::Native_vec_ __ockl_image_load_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c);
-
-__device__ float4::Native_vec_ __ockl_image_load_1Db(unsigned int ADDRESS_SPACE_CONSTANT*i, int c);
-
-__device__ float4::Native_vec_ __ockl_image_load_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c);
-
-__device__ float4::Native_vec_ __ockl_image_load_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c);
-
-__device__ float4::Native_vec_ __ockl_image_load_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c);
-
-__device__ float4::Native_vec_ __ockl_image_load_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c);
-
-__device__ float4::Native_vec_ __ockl_image_load_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int f);
-
-__device__ float4::Native_vec_ __ockl_image_load_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int f);
-
-__device__ float4::Native_vec_ __ockl_image_load_lod_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c, int l);
-
-__device__ float4::Native_vec_ __ockl_image_load_lod_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l);
-
-__device__ float4::Native_vec_ __ockl_image_load_lod_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l);
-
-__device__ float4::Native_vec_ __ockl_image_load_lod_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l);
-
-__device__ float4::Native_vec_ __ockl_image_load_lod_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l);
-
-__device__ float4::Native_vec_ __ockl_image_load_lod_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int f, int l);
-
-__device__ float4::Native_vec_ __ockl_image_load_lod_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int f, int l);
-
-__device__ void __ockl_image_store_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c, float4::Native_vec_ p);
-
-__device__ void __ockl_image_store_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, float4::Native_vec_ p);
-
-__device__ void __ockl_image_store_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, float4::Native_vec_ p);
-
-__device__ void __ockl_image_store_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, float4::Native_vec_ p);
-
-__device__ void __ockl_image_store_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, float4::Native_vec_ p);
-
-__device__ void __ockl_image_store_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, float4::Native_vec_ p);
-
-__device__ void __ockl_image_store_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, float4::Native_vec_ p);
-
-__device__ void __ockl_image_store_lod_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c, int l, float4::Native_vec_ p);
-
-__device__ void __ockl_image_store_lod_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l, float4::Native_vec_ p);
-
-__device__ void __ockl_image_store_lod_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l, float4::Native_vec_ p);
-
-__device__ void __ockl_image_store_lod_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l, float4::Native_vec_ p);
-
-__device__ void __ockl_image_store_lod_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l, float4::Native_vec_ p);
-
-__device__ void __ockl_image_store_lod_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l, float4::Native_vec_ p);
-
-__device__ void __ockl_image_store_lod_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l, float4::Native_vec_ p);
-
-__device__ float4::Native_vec_ __ockl_image_sample_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float c);
-
-__device__ float4::Native_vec_ __ockl_image_sample_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c);
-
-__device__ float4::Native_vec_ __ockl_image_sample_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c);
-
-__device__ float4::Native_vec_ __ockl_image_sample_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c);
-
-__device__ float4::Native_vec_ __ockl_image_sample_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c);
-
-__device__ float4::Native_vec_ __ockl_image_sample_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c);
-
-__device__ float4::Native_vec_ __ockl_image_sample_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c);
-
-__device__ float4::Native_vec_ __ockl_image_sample_grad_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float c, float dx, float dy);
-
-__device__ float4::Native_vec_ __ockl_image_sample_grad_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float dx, float dy);
-
-__device__ float4::Native_vec_ __ockl_image_sample_grad_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float2::Native_vec_ dx, float2::Native_vec_ dy);
-
-__device__ float4::Native_vec_ __ockl_image_sample_grad_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float2::Native_vec_ dx, float2::Native_vec_ dy);
-
-__device__ float4::Native_vec_ __ockl_image_sample_grad_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float4::Native_vec_ dx, float4::Native_vec_ dy);
-
-__device__ float4::Native_vec_ __ockl_image_sample_lod_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float c, float l);
-
-__device__ float4::Native_vec_ __ockl_image_sample_lod_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float l);
-
-__device__ float4::Native_vec_ __ockl_image_sample_lod_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float l);
-
-__device__ float4::Native_vec_ __ockl_image_sample_lod_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l);
-
-__device__ float4::Native_vec_ __ockl_image_sample_lod_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l);
-
-__device__ float4::Native_vec_ __ockl_image_sample_lod_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l);
-
-__device__ float4::Native_vec_ __ockl_image_sample_lod_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l);
-
-__device__ float4::Native_vec_ __ockl_image_gather4r_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c);
-
-__device__ float4::Native_vec_ __ockl_image_gather4g_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c);
-
-__device__ float4::Native_vec_ __ockl_image_gather4b_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c);
-
-__device__ float4::Native_vec_ __ockl_image_gather4a_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c);
-
-};
\ No newline at end of file
diff --git a/third_party/rocm/include/hip/hcc_detail/program_state.hpp b/third_party/rocm/include/hip/hcc_detail/program_state.hpp
deleted file mode 100644
index 6128a4c..0000000
--- a/third_party/rocm/include/hip/hcc_detail/program_state.hpp
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#pragma once
-
-#include <hsa/amd_hsa_kernel_code.h>
-#include <hsa/hsa.h>
-#include <hsa/hsa_ext_amd.h>
-#include <hsa/hsa_ven_amd_loader.h>
-
-#include <cstddef>
-#include <cstdint>
-#include <cstdlib>
-
-#include <hip/hip_common.h>
-
-struct ihipModuleSymbol_t;
-using hipFunction_t = ihipModuleSymbol_t*;
-
-namespace hip_impl {
-
-// This section contains internal APIs that
-// needs to be exported
-#ifdef __GNUC__
-#pragma GCC visibility push (default)
-#endif
-
-struct kernarg_impl;
-class kernarg {
-public:
-    kernarg();
-    kernarg(kernarg&&);
-    ~kernarg();
-    std::uint8_t* data();
-    std::size_t   size();
-    void reserve(std::size_t);
-    void resize(std::size_t);
-private:
-    kernarg_impl* impl;
-};
-
-class kernargs_size_align;
-class program_state_impl;
-class program_state {
-public:
-    program_state();
-    ~program_state();
-    program_state(const program_state&) = delete;
-
-    hipFunction_t kernel_descriptor(std::uintptr_t,
-                                    hsa_agent_t);
-
-    kernargs_size_align get_kernargs_size_align(std::uintptr_t);
-    hsa_executable_t load_executable(const char*, const size_t,
-                                     hsa_executable_t,
-                                     hsa_agent_t);
-    hsa_executable_t load_executable_no_copy(const char*, const size_t,
-                                             hsa_executable_t,
-                                             hsa_agent_t);
-
-    void* global_addr_by_name(const char* name);
-
-private:
-    friend class agent_globals_impl;
-    program_state_impl* impl;
-};
-
-class kernargs_size_align {
-public:
-    std::size_t size(std::size_t n) const;
-    std::size_t alignment(std::size_t n) const;
-    const void* getHandle() const {return handle;};
-private:
-    const void* handle;
-    friend kernargs_size_align program_state::get_kernargs_size_align(std::uintptr_t);
-};
-
-#ifdef __GNUC__
-#pragma GCC visibility pop
-#endif
-
-inline
-__attribute__((visibility("hidden")))
-program_state& get_program_state() {
-    static program_state ps;
-    return ps;
-}
-}  // Namespace hip_impl.
diff --git a/third_party/rocm/include/hip/hcc_detail/surface_functions.h b/third_party/rocm/include/hip/hcc_detail/surface_functions.h
deleted file mode 100644
index b9cab1f..0000000
--- a/third_party/rocm/include/hip/hcc_detail/surface_functions.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
-Copyright (c) 2018 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_SURFACE_FUNCTIONS_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_SURFACE_FUNCTIONS_H
-
-#include <hip/hcc_detail/hip_surface_types.h>
-
-#define __SURFACE_FUNCTIONS_DECL__ static inline __device__
-template <class T>
-__SURFACE_FUNCTIONS_DECL__ void surf2Dread(T* data, hipSurfaceObject_t surfObj, int x, int y,
-                                           int boundaryMode = hipBoundaryModeZero) {
-    hipArray* arrayPtr = (hipArray*)surfObj;
-    size_t width = arrayPtr->width;
-    size_t height = arrayPtr->height;
-    int32_t xOffset = x / sizeof(T);
-    T* dataPtr = (T*)arrayPtr->data;
-    if ((xOffset > width) || (xOffset < 0) || (y > height) || (y < 0)) {
-        if (boundaryMode == hipBoundaryModeZero) {
-            *data = 0;
-        }
-    } else {
-        *data = *(dataPtr + y * width + xOffset);
-    }
-}
-
-template <class T>
-__SURFACE_FUNCTIONS_DECL__ void surf2Dwrite(T data, hipSurfaceObject_t surfObj, int x, int y,
-                                            int boundaryMode = hipBoundaryModeZero) {
-    hipArray* arrayPtr = (hipArray*)surfObj;
-    size_t width = arrayPtr->width;
-    size_t height = arrayPtr->height;
-    int32_t xOffset = x / sizeof(T);
-    T* dataPtr = (T*)arrayPtr->data;
-    if (!((xOffset > width) || (xOffset < 0) || (y > height) || (y < 0))) {
-        *(dataPtr + y * width + xOffset) = data;
-    }
-}
-
-#endif
diff --git a/third_party/rocm/include/hip/hcc_detail/texture_fetch_functions.h b/third_party/rocm/include/hip/hcc_detail/texture_fetch_functions.h
deleted file mode 100644
index 03c1780..0000000
--- a/third_party/rocm/include/hip/hcc_detail/texture_fetch_functions.h
+++ /dev/null
@@ -1,386 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#pragma once
-
-#if defined(__cplusplus)
-
-#include <hip/hip_vector_types.h>
-#include <hip/texture_types.h>
-#include <hip/hcc_detail/ockl_image.h>
-
-#include <type_traits>
-
-#define TEXTURE_PARAMETERS_INIT                                                                     \
-    unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)t.textureObject; \
-    unsigned int ADDRESS_SPACE_CONSTANT* s = i + HIP_SAMPLER_OBJECT_OFFSET_DWORD;
-
-template<typename T>
-struct __hip_is_tex_channel_type
-{
-    static constexpr bool value =
-        std::is_same<T, char>::value ||
-        std::is_same<T, unsigned char>::value ||
-        std::is_same<T, short>::value ||
-        std::is_same<T, unsigned short>::value ||
-        std::is_same<T, int>::value ||
-        std::is_same<T, unsigned int>::value ||
-        std::is_same<T, float>::value;
-};
-
-template<
-    typename T,
-    unsigned int rank>
-struct __hip_is_tex_channel_type<HIP_vector_type<T, rank>>
-{
-    static constexpr bool value =
-        __hip_is_tex_channel_type<T>::value &&
-        ((rank == 1) ||
-         (rank == 2) ||
-         (rank == 4));
-};
-
-template<typename T>
-struct __hip_is_tex_normalized_channel_type
-{
-    static constexpr bool value =
-        std::is_same<T, char>::value ||
-        std::is_same<T, unsigned char>::value ||
-        std::is_same<T, short>::value ||
-        std::is_same<T, unsigned short>::value;
-};
-
-template<
-    typename T,
-    unsigned int rank>
-struct __hip_is_tex_normalized_channel_type<HIP_vector_type<T, rank>>
-{
-    static constexpr bool value =
-        __hip_is_tex_normalized_channel_type<T>::value &&
-        ((rank == 1) ||
-         (rank == 2) ||
-         (rank == 4));
-};
-
-template <
-    typename T,
-    hipTextureReadMode readMode,
-    typename Enable = void>
-struct __hip_tex_ret
-{
-    static_assert(std::is_same<Enable, void>::value, "Invalid channel type!");
-};
-
-template <
-    typename T,
-    hipTextureReadMode readMode>
-using __hip_tex_ret_t = typename __hip_tex_ret<T, readMode, bool>::type;
-
-template <typename T>
-struct __hip_tex_ret<
-    T,
-    hipReadModeElementType,
-    typename std::enable_if<__hip_is_tex_channel_type<T>::value, bool>::type>
-{
-    using type = T;
-};
-
-template<
-    typename T,
-    unsigned int rank>
-struct __hip_tex_ret<
-    HIP_vector_type<T, rank>,
-    hipReadModeElementType,
-    typename std::enable_if<__hip_is_tex_channel_type<HIP_vector_type<T, rank>>::value, bool>::type>
-{
-    using type = HIP_vector_type<__hip_tex_ret_t<T, hipReadModeElementType>, rank>;
-};
-
-template<typename T>
-struct __hip_tex_ret<
-    T,
-    hipReadModeNormalizedFloat,
-    typename std::enable_if<__hip_is_tex_normalized_channel_type<T>::value, bool>::type>
-{
-    using type = float;
-};
-
-template<
-    typename T,
-    unsigned int rank>
-struct __hip_tex_ret<
-    HIP_vector_type<T, rank>,
-    hipReadModeNormalizedFloat,
-    typename std::enable_if<__hip_is_tex_normalized_channel_type<HIP_vector_type<T, rank>>::value, bool>::type>
-{
-    using type = HIP_vector_type<__hip_tex_ret_t<T, hipReadModeNormalizedFloat>, rank>;
-};
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex1Dfetch(texture<T, hipTextureType1D, readMode> t, int x)
-{
-    TEXTURE_PARAMETERS_INIT;
-    auto tmp = __ockl_image_load_1Db(i, x);
-    return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex1D(texture<T, hipTextureType1D, readMode> t, float x)
-{
-    TEXTURE_PARAMETERS_INIT;
-    auto tmp = __ockl_image_sample_1D(i, s, x);
-    return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex2D(texture<T, hipTextureType2D, readMode> t, float x, float y)
-{
-    TEXTURE_PARAMETERS_INIT;
-    auto tmp = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex1DLayered(texture<T, hipTextureType1DLayered, readMode> t, float x, int layer)
-{
-    TEXTURE_PARAMETERS_INIT;
-    auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex2DLayered(texture<T, hipTextureType2DLayered, readMode> t, float x, float y, int layer)
-{
-    TEXTURE_PARAMETERS_INIT;
-    auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex3D(texture<T, hipTextureType3D, readMode> t, float x, float y, float z)
-{
-    TEXTURE_PARAMETERS_INIT;
-    auto tmp = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> texCubemap(texture<T, hipTextureTypeCubemap, readMode> t, float x, float y, float z)
-{
-    TEXTURE_PARAMETERS_INIT;
-    auto tmp = __ockl_image_sample_CM(i, s, float4(x, y, z, 0.0f).data);
-    return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex1DLod(texture<T, hipTextureType1D, readMode> t, float x, float level)
-{
-    TEXTURE_PARAMETERS_INIT;
-    auto tmp = __ockl_image_sample_lod_1D(i, s, x, level);
-    return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex2DLod(texture<T, hipTextureType2D, readMode> t, float x, float y, float level)
-{
-    TEXTURE_PARAMETERS_INIT;
-    auto tmp = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex1DLayeredLod(texture<T, hipTextureType1DLayered, readMode> t, float x, int layer, float level)
-{
-    TEXTURE_PARAMETERS_INIT;
-    auto tmp = __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex2DLayeredLod(texture<T, hipTextureType2DLayered, readMode> t, float x, float y, int layer, float level)
-{
-    TEXTURE_PARAMETERS_INIT;
-    auto tmp = __ockl_image_sample_lod_2Da(i, s, float4(x, y, layer, 0.0f).data, level);
-    return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex3DLod(texture<T, hipTextureType3D, readMode> t, float x, float y, float z, float level)
-{
-    TEXTURE_PARAMETERS_INIT;
-    auto tmp = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, level);
-    return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> texCubemapLod(texture<T, hipTextureTypeCubemap, readMode> t, float x, float y, float z, float level)
-{
-    TEXTURE_PARAMETERS_INIT;
-    auto tmp = __ockl_image_sample_lod_CM(i, s, float4(x, y, z, 0.0f).data, level);
-    return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> texCubemapLayered(texture<T, hipTextureTypeCubemapLayered, readMode> t, float x, float y, float z, int layer)
-{
-    TEXTURE_PARAMETERS_INIT;
-    auto tmp = __ockl_image_sample_CMa(i, s, float4(x, y, z, layer).data);
-    return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> texCubemapLayeredLod(texture<T, hipTextureTypeCubemapLayered, readMode> t, float x, float y, float z, int layer, float level)
-{
-    TEXTURE_PARAMETERS_INIT;
-    auto tmp = __ockl_image_sample_lod_CMa(i, s, float4(x, y, z, layer).data, level);
-    return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> texCubemapGrad(texture<T, hipTextureTypeCubemap, readMode> t, float x, float y, float z, float4 dPdx, float4 dPdy)
-{
-    TEXTURE_PARAMETERS_INIT;
-    // TODO missing in device libs.
-    // auto tmp = __ockl_image_sample_grad_CM(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
-    // return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-    return {};
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> texCubemapLayeredGrad(texture<T, hipTextureTypeCubemapLayered, readMode> t, float x, float y, float z, int layer, float4 dPdx, float4 dPdy)
-{
-    TEXTURE_PARAMETERS_INIT;
-    // TODO missing in device libs.
-    // auto tmp = __ockl_image_sample_grad_CMa(i, s, float4(x, y, z, layer).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
-    // return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-    return {};
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex1DGrad(texture<T, hipTextureType1D, readMode> t, float x, float dPdx, float dPdy)
-{
-    TEXTURE_PARAMETERS_INIT;
-    auto tmp = __ockl_image_sample_grad_1D(i, s, x, dPdx, dPdy);
-    return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex2DGrad(texture<T, hipTextureType2D, readMode> t, float x, float y, float2 dPdx, float2 dPdy)
-{
-    TEXTURE_PARAMETERS_INIT;
-    auto tmp = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, float2(dPdx.x, dPdx.y).data,  float2(dPdy.x, dPdy.y).data);
-    return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex1DLayeredGrad(texture<T, hipTextureType1DLayered, readMode> t, float x, int layer, float dPdx, float dPdy)
-{
-    TEXTURE_PARAMETERS_INIT;
-    auto tmp = __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dPdx, dPdy);
-    return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex2DLayeredGrad(texture<T, hipTextureType2DLayered, readMode> t, float x, float y, int layer, float2 dPdx, float2 dPdy)
-{
-    TEXTURE_PARAMETERS_INIT;
-    auto tmp = __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data);
-    return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex_ret_t<T, readMode> tex3DGrad(texture<T, hipTextureType3D, readMode> t, float x, float y, float z, float4 dPdx, float4 dPdy)
-{
-    TEXTURE_PARAMETERS_INIT;
-    auto tmp = __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
-    return *reinterpret_cast<__hip_tex_ret_t<T, readMode>*>(&tmp);
-}
-
-template <
-    typename T,
-    hipTextureReadMode readMode,
-    typename Enable = void>
-struct __hip_tex2dgather_ret
-{
-    static_assert(std::is_same<Enable, void>::value, "Invalid channel type!");
-};
-
-template <
-    typename T,
-    hipTextureReadMode readMode>
-using __hip_tex2dgather_ret_t = typename __hip_tex2dgather_ret<T, readMode, bool>::type;
-
-template <typename T>
-struct __hip_tex2dgather_ret<
-    T,
-    hipReadModeElementType,
-    typename std::enable_if<__hip_is_tex_channel_type<T>::value, bool>::type>
-{
-    using type = HIP_vector_type<T, 4>;
-};
-
-template<
-    typename T,
-    unsigned int rank>
-struct __hip_tex2dgather_ret<
-    HIP_vector_type<T, rank>,
-    hipReadModeElementType,
-    typename std::enable_if<__hip_is_tex_channel_type<HIP_vector_type<T, rank>>::value, bool>::type>
-{
-    using type = HIP_vector_type<T, 4>;
-};
-
-template <typename T>
-struct __hip_tex2dgather_ret<
-    T,
-    hipReadModeNormalizedFloat,
-    typename std::enable_if<__hip_is_tex_normalized_channel_type<T>::value, bool>::type>
-{
-    using type = float4;
-};
-
-template <typename T, hipTextureReadMode readMode>
-static __forceinline__ __device__ __hip_tex2dgather_ret_t<T, readMode> tex2Dgather(texture<T, hipTextureType2D, readMode> t, float x, float y, int comp=0)
-{
-    TEXTURE_PARAMETERS_INIT;
-    switch (comp) {
-    case 1: {
-        auto tmp = __ockl_image_gather4g_2D(i, s, float2(x, y).data);
-        return *reinterpret_cast<__hip_tex2dgather_ret_t<T, readMode>*>(&tmp);
-    }
-    case 2: {
-        auto tmp = __ockl_image_gather4b_2D(i, s, float2(x, y).data);
-        return *reinterpret_cast<__hip_tex2dgather_ret_t<T, readMode>*>(&tmp);
-    }
-    case 3: {
-        auto tmp = __ockl_image_gather4a_2D(i, s, float2(x, y).data);
-        return *reinterpret_cast<__hip_tex2dgather_ret_t<T, readMode>*>(&tmp);
-    }
-    default: {
-        auto tmp = __ockl_image_gather4r_2D(i, s, float2(x, y).data);
-        return *reinterpret_cast<__hip_tex2dgather_ret_t<T, readMode>*>(&tmp);
-    }
-    }
-    return {};
-}
-
-#endif
diff --git a/third_party/rocm/include/hip/hcc_detail/texture_functions.h b/third_party/rocm/include/hip/hcc_detail/texture_functions.h
deleted file mode 100644
index 4a84507..0000000
--- a/third_party/rocm/include/hip/hcc_detail/texture_functions.h
+++ /dev/null
@@ -1,11102 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_TEXTURE_FUNCTIONS_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_TEXTURE_FUNCTIONS_H
-#include <hip/hcc_detail/hip_vector_types.h>
-#include <hip/hcc_detail/hip_texture_types.h>
-
-#pragma push_macro("TYPEDEF_VECTOR_VALUE_TYPE")
-#define TYPEDEF_VECTOR_VALUE_TYPE(SCALAR_TYPE) \
-typedef SCALAR_TYPE __hip_##SCALAR_TYPE##2_vector_value_type __attribute__((ext_vector_type(2))); \
-typedef SCALAR_TYPE __hip_##SCALAR_TYPE##3_vector_value_type __attribute__((ext_vector_type(3))); \
-typedef SCALAR_TYPE __hip_##SCALAR_TYPE##4_vector_value_type __attribute__((ext_vector_type(4))); \
-typedef SCALAR_TYPE __hip_##SCALAR_TYPE##8_vector_value_type __attribute__((ext_vector_type(8))); \
-typedef SCALAR_TYPE __hip_##SCALAR_TYPE##16_vector_value_type __attribute__((ext_vector_type(16)));
-
-TYPEDEF_VECTOR_VALUE_TYPE(float);
-TYPEDEF_VECTOR_VALUE_TYPE(int);
-TYPEDEF_VECTOR_VALUE_TYPE(uint);
-
-#undef TYPEDEF_VECTOR_VALUE_TYPE
-#pragma pop_macro("TYPEDEF_VECTOR_VALUE_TYPE")
-
-union TData {
-    __hip_float4_vector_value_type f;
-    __hip_int4_vector_value_type i;
-    __hip_uint4_vector_value_type u;
-};
-
-#define __TEXTURE_FUNCTIONS_DECL__ static inline __device__
-
-
-#if (__hcc_workweek__ >= 18114) || __clang__
-#define ADDRESS_SPACE_CONSTANT __attribute__((address_space(4)))
-#else
-#define ADDRESS_SPACE_CONSTANT __attribute__((address_space(2)))
-#endif
-
-#define TEXTURE_PARAMETERS_INIT                                                                    \
-    unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)textureObject;  \
-    unsigned int ADDRESS_SPACE_CONSTANT* s = i + HIP_SAMPLER_OBJECT_OFFSET_DWORD;                  \
-    TData texel;
-#define TEXTURE_REF_PARAMETERS_INIT                                                                      \
-    unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)texRef.textureObject; \
-    unsigned int ADDRESS_SPACE_CONSTANT* s = i + HIP_SAMPLER_OBJECT_OFFSET_DWORD;                        \
-    TData texel;
-#define TEXTURE_SET_FLOAT *retVal = texel.f.x;
-
-#define TEXTURE_SET_SIGNED *retVal = texel.i.x;
-
-#define TEXTURE_SET_UNSIGNED *retVal = texel.u.x;
-
-#define TEXTURE_SET_FLOAT_X retVal->x = texel.f.x;
-
-#define TEXTURE_SET_SIGNED_X retVal->x = texel.i.x;
-
-#define TEXTURE_SET_UNSIGNED_X retVal->x = texel.u.x;
-
-#define TEXTURE_SET_FLOAT_XY                                                                       \
-    retVal->x = texel.f.x;                                                                         \
-    retVal->y = texel.f.y;
-
-#define TEXTURE_SET_SIGNED_XY                                                                      \
-    retVal->x = texel.i.x;                                                                         \
-    retVal->y = texel.i.y;
-
-#define TEXTURE_SET_UNSIGNED_XY                                                                    \
-    retVal->x = texel.u.x;                                                                         \
-    retVal->y = texel.u.y;
-
-#define TEXTURE_SET_FLOAT_XYZW                                                                     \
-    retVal->x = texel.f.x;                                                                         \
-    retVal->y = texel.f.y;                                                                         \
-    retVal->z = texel.f.z;                                                                         \
-    retVal->w = texel.f.w;
-
-#define TEXTURE_SET_SIGNED_XYZW                                                                    \
-    retVal->x = texel.i.x;                                                                         \
-    retVal->y = texel.i.y;                                                                         \
-    retVal->z = texel.i.z;                                                                         \
-    retVal->w = texel.i.w;
-
-#define TEXTURE_SET_UNSIGNED_XYZW                                                                  \
-    retVal->x = texel.u.x;                                                                         \
-    retVal->y = texel.u.y;                                                                         \
-    retVal->z = texel.u.z;                                                                         \
-    retVal->w = texel.u.w;
-
-#define TEXTURE_RETURN_CHAR return texel.i.x;
-
-#define TEXTURE_RETURN_UCHAR return texel.u.x;
-
-#define TEXTURE_RETURN_SHORT return texel.i.x;
-
-#define TEXTURE_RETURN_USHORT return texel.u.x;
-
-#define TEXTURE_RETURN_INT return texel.i.x;
-
-#define TEXTURE_RETURN_UINT return texel.u.x;
-
-#define TEXTURE_RETURN_SIGNED return texel.i.x;
-
-#define TEXTURE_RETURN_UNSIGNED return texel.u.x;
-
-#define TEXTURE_RETURN_CHAR_X return make_char1(texel.i.x);
-
-#define TEXTURE_RETURN_UCHAR_X return make_uchar1(texel.u.x);
-
-#define TEXTURE_RETURN_SHORT_X return make_short1(texel.i.x);
-
-#define TEXTURE_RETURN_USHORT_X return make_ushort1(texel.u.x);
-
-#define TEXTURE_RETURN_INT_X return make_int1(texel.i.x);
-
-#define TEXTURE_RETURN_UINT_X return make_uint1(texel.u.x);
-
-#define TEXTURE_RETURN_CHAR_XY return make_char2(texel.i.x, texel.i.y);
-
-#define TEXTURE_RETURN_UCHAR_XY return make_uchar2(texel.u.x, texel.u.y);
-
-#define TEXTURE_RETURN_SHORT_XY return make_short2(texel.i.x, texel.i.y);
-
-#define TEXTURE_RETURN_USHORT_XY return make_ushort2(texel.u.x, texel.u.y);
-
-#define TEXTURE_RETURN_INT_XY return make_int2(texel.i.x, texel.i.y);
-
-#define TEXTURE_RETURN_UINT_XY return make_uint2(texel.u.x, texel.u.y);
-
-#define TEXTURE_RETURN_CHAR_XYZW return make_char4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
-
-#define TEXTURE_RETURN_UCHAR_XYZW return make_uchar4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
-
-#define TEXTURE_RETURN_SHORT_XYZW return make_short4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
-
-#define TEXTURE_RETURN_USHORT_XYZW return make_ushort4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
-
-#define TEXTURE_RETURN_INT_XYZW return make_int4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
-
-#define TEXTURE_RETURN_UINT_XYZW return make_uint4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
-
-#define TEXTURE_RETURN_FLOAT return texel.f.x;
-
-#define TEXTURE_RETURN_FLOAT_X return make_float1(texel.f.x);
-
-#define TEXTURE_RETURN_FLOAT_XY return make_float2(texel.f.x, texel.f.y);
-
-#define TEXTURE_RETURN_FLOAT_XYZW return make_float4(texel.f.x, texel.f.y, texel.f.z, texel.f.w);
-
-extern "C" {
-
-__device__
-__hip_float4_vector_value_type __ockl_image_sample_1D(
-    unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
-    float c);
-
-__device__
-__hip_float4_vector_value_type __ockl_image_sample_1Da(
-    unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
-    __hip_float2_vector_value_type c);
-
-__device__
-__hip_float4_vector_value_type __ockl_image_sample_2D(
-    unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
-    __hip_float2_vector_value_type c);
-
-
-__device__
-__hip_float4_vector_value_type __ockl_image_sample_2Da(
-    unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
-    __hip_float4_vector_value_type c);
-
-__device__
-float __ockl_image_sample_2Dad(
-    unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
-    __hip_float4_vector_value_type c);
-
-__device__
-float __ockl_image_sample_2Dd(
-    unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
-    __hip_float2_vector_value_type c);
-
-__device__
-__hip_float4_vector_value_type __ockl_image_sample_3D(
-    unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
-    __hip_float4_vector_value_type c);
-
-__device__
-__hip_float4_vector_value_type __ockl_image_sample_grad_1D(
-    unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
-    float c, float dx, float dy);
-
-__device__
-__hip_float4_vector_value_type __ockl_image_sample_grad_1Da(
-    unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
-    __hip_float2_vector_value_type c, float dx, float dy);
-
-__device__
-__hip_float4_vector_value_type __ockl_image_sample_grad_2D(
-    unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
-    __hip_float2_vector_value_type c, __hip_float2_vector_value_type dx, __hip_float2_vector_value_type dy);
-
-__device__
-__hip_float4_vector_value_type __ockl_image_sample_grad_2Da(
-    unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
-    __hip_float4_vector_value_type c, __hip_float2_vector_value_type dx, __hip_float2_vector_value_type dy);
-
-__device__
-float __ockl_image_sample_grad_2Dad(
-    unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
-    __hip_float4_vector_value_type c, __hip_float2_vector_value_type dx, __hip_float2_vector_value_type dy);
-
-__device__
-float __ockl_image_sample_grad_2Dd(
-    unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
-    __hip_float2_vector_value_type c, __hip_float2_vector_value_type dx, __hip_float2_vector_value_type dy);
-
-__device__
-__hip_float4_vector_value_type __ockl_image_sample_grad_3D(
-    unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
-    __hip_float4_vector_value_type c, __hip_float4_vector_value_type dx, __hip_float4_vector_value_type dy);
-
-__device__
-__hip_float4_vector_value_type __ockl_image_sample_lod_1D(
-    unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
-    float c, float l);
-
-__device__
-__hip_float4_vector_value_type __ockl_image_sample_lod_1Da(
-    unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
-    __hip_float2_vector_value_type c, float l);
-
-__device__
-__hip_float4_vector_value_type __ockl_image_sample_lod_2D(
-    unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
-    __hip_float2_vector_value_type c, float l);
-
-__device__
-__hip_float4_vector_value_type __ockl_image_sample_lod_2Da(
-    unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
-    __hip_float4_vector_value_type c, float l);
-
-__device__
-float __ockl_image_sample_lod_2Dad(
-    unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
-    __hip_float4_vector_value_type c, float l);
-
-__device__
-float __ockl_image_sample_lod_2Dd(
-    unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
-    __hip_float2_vector_value_type c, float l);
-
-__device__
-__hip_float4_vector_value_type __ockl_image_sample_lod_3D(
-    unsigned int ADDRESS_SPACE_CONSTANT* i, unsigned int ADDRESS_SPACE_CONSTANT* s,
-    __hip_float4_vector_value_type c, float l);
-}
-
-////////////////////////////////////////////////////////////
-// Texture object APIs
-////////////////////////////////////////////////////////////
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(char* retVal, hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(char1* retVal, hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(char2* retVal, hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(char4* retVal, hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(unsigned char* retVal, hipTextureObject_t textureObject,
-                                           int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(uchar1* retVal, hipTextureObject_t textureObject,
-                                           int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(uchar2* retVal, hipTextureObject_t textureObject,
-                                           int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(uchar4* retVal, hipTextureObject_t textureObject,
-                                           int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(short* retVal, hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(short1* retVal, hipTextureObject_t textureObject,
-                                           int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(short2* retVal, hipTextureObject_t textureObject,
-                                           int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(short4* retVal, hipTextureObject_t textureObject,
-                                           int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(unsigned short* retVal, hipTextureObject_t textureObject,
-                                           int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(ushort1* retVal, hipTextureObject_t textureObject,
-                                           int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(ushort2* retVal, hipTextureObject_t textureObject,
-                                           int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(ushort4* retVal, hipTextureObject_t textureObject,
-                                           int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(int* retVal, hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(int1* retVal, hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(int2* retVal, hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(int4* retVal, hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(unsigned int* retVal, hipTextureObject_t textureObject,
-                                           int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(uint1* retVal, hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(uint2* retVal, hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(uint4* retVal, hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(float* retVal, hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_FLOAT;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(float1* retVal, hipTextureObject_t textureObject,
-                                           int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_FLOAT_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(float2* retVal, hipTextureObject_t textureObject,
-                                           int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_FLOAT_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(float4* retVal, hipTextureObject_t textureObject,
-                                           int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_FLOAT_XYZW;
-}
-
-template <class T>
-__TEXTURE_FUNCTIONS_DECL__ T tex1Dfetch(hipTextureObject_t textureObject, int x) {
-    T ret;
-    tex1Dfetch(&ret, textureObject, x);
-    return ret;
-}
-
-////////////////////////////////////////////////////////////
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(char* retVal, hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(char1* retVal, hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(char2* retVal, hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(char4* retVal, hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(unsigned char* retVal, hipTextureObject_t textureObject,
-                                      float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(uchar1* retVal, hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(uchar2* retVal, hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(uchar4* retVal, hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(short* retVal, hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(short1* retVal, hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(short2* retVal, hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(short4* retVal, hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(unsigned short* retVal, hipTextureObject_t textureObject,
-                                      float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(ushort1* retVal, hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(ushort2* retVal, hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(ushort4* retVal, hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(int* retVal, hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(int1* retVal, hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(int2* retVal, hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(int4* retVal, hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(unsigned int* retVal, hipTextureObject_t textureObject,
-                                      float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(uint1* retVal, hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(uint2* retVal, hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(uint4* retVal, hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(float* retVal, hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_FLOAT;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(float1* retVal, hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_FLOAT_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(float2* retVal, hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_FLOAT_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1D(float4* retVal, hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_SET_FLOAT_XYZW;
-}
-template <class T>
-__TEXTURE_FUNCTIONS_DECL__ T tex1D(hipTextureObject_t textureObject, float x) {
-    T ret;
-    tex1D(&ret, textureObject, x);
-    return ret;
-}
-
-////////////////////////////////////////////////////////////
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(char* retVal, hipTextureObject_t textureObject, float x,
-                                         float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(char1* retVal, hipTextureObject_t textureObject, float x,
-                                         float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(char2* retVal, hipTextureObject_t textureObject, float x,
-                                         float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(char4* retVal, hipTextureObject_t textureObject, float x,
-                                         float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(unsigned char* retVal, hipTextureObject_t textureObject,
-                                         float x, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(uchar1* retVal, hipTextureObject_t textureObject, float x,
-                                         float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(uchar2* retVal, hipTextureObject_t textureObject, float x,
-                                         float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(uchar4* retVal, hipTextureObject_t textureObject, float x,
-                                         float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(short* retVal, hipTextureObject_t textureObject, float x,
-                                         float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(short1* retVal, hipTextureObject_t textureObject, float x,
-                                         float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(short2* retVal, hipTextureObject_t textureObject, float x,
-                                         float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(short4* retVal, hipTextureObject_t textureObject, float x,
-                                         float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(unsigned short* retVal, hipTextureObject_t textureObject,
-                                         float x, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(ushort1* retVal, hipTextureObject_t textureObject, float x,
-                                         float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(ushort2* retVal, hipTextureObject_t textureObject, float x,
-                                         float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(ushort4* retVal, hipTextureObject_t textureObject, float x,
-                                         float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(int* retVal, hipTextureObject_t textureObject, float x,
-                                         float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(int1* retVal, hipTextureObject_t textureObject, float x,
-                                         float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(int2* retVal, hipTextureObject_t textureObject, float x,
-                                         float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(int4* retVal, hipTextureObject_t textureObject, float x,
-                                         float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(unsigned int* retVal, hipTextureObject_t textureObject,
-                                         float x, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(uint1* retVal, hipTextureObject_t textureObject, float x,
-                                         float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(uint2* retVal, hipTextureObject_t textureObject, float x,
-                                         float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(uint4* retVal, hipTextureObject_t textureObject, float x,
-                                         float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(float* retVal, hipTextureObject_t textureObject, float x,
-                                         float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_SET_FLOAT;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(float1* retVal, hipTextureObject_t textureObject, float x,
-                                         float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_SET_FLOAT_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(float2* retVal, hipTextureObject_t textureObject, float x,
-                                         float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_SET_FLOAT_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(float4* retVal, hipTextureObject_t textureObject, float x,
-                                         float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_SET_FLOAT_XYZW;
-}
-
-template <class T>
-__TEXTURE_FUNCTIONS_DECL__ T tex1DLod(hipTextureObject_t textureObject, float x, float level) {
-    T ret;
-    tex1DLod(&ret, textureObject, x, level);
-    return ret;
-}
-
-////////////////////////////////////////////////////////////
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(char* retVal, hipTextureObject_t textureObject, float x,
-                                          float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(char1* retVal, hipTextureObject_t textureObject, float x,
-                                          float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(char2* retVal, hipTextureObject_t textureObject, float x,
-                                          float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(char4* retVal, hipTextureObject_t textureObject, float x,
-                                          float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(unsigned char* retVal, hipTextureObject_t textureObject,
-                                          float x, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(uchar1* retVal, hipTextureObject_t textureObject, float x,
-                                          float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(uchar2* retVal, hipTextureObject_t textureObject, float x,
-                                          float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(uchar4* retVal, hipTextureObject_t textureObject, float x,
-                                          float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(short* retVal, hipTextureObject_t textureObject, float x,
-                                          float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(short1* retVal, hipTextureObject_t textureObject, float x,
-                                          float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(short2* retVal, hipTextureObject_t textureObject, float x,
-                                          float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(short4* retVal, hipTextureObject_t textureObject, float x,
-                                          float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(unsigned short* retVal, hipTextureObject_t textureObject,
-                                          float x, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(ushort1* retVal, hipTextureObject_t textureObject,
-                                          float x, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(ushort2* retVal, hipTextureObject_t textureObject,
-                                          float x, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(ushort4* retVal, hipTextureObject_t textureObject,
-                                          float x, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(int* retVal, hipTextureObject_t textureObject, float x,
-                                          float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(int1* retVal, hipTextureObject_t textureObject, float x,
-                                          float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(int2* retVal, hipTextureObject_t textureObject, float x,
-                                          float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(int4* retVal, hipTextureObject_t textureObject, float x,
-                                          float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(unsigned int* retVal, hipTextureObject_t textureObject,
-                                          float x, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(uint1* retVal, hipTextureObject_t textureObject, float x,
-                                          float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(uint2* retVal, hipTextureObject_t textureObject, float x,
-                                          float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(uint4* retVal, hipTextureObject_t textureObject, float x,
-                                          float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(float* retVal, hipTextureObject_t textureObject, float x,
-                                          float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_SET_FLOAT;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(float1* retVal, hipTextureObject_t textureObject, float x,
-                                          float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_SET_FLOAT_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(float2* retVal, hipTextureObject_t textureObject, float x,
-                                          float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_SET_FLOAT_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(float4* retVal, hipTextureObject_t textureObject, float x,
-                                          float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_SET_FLOAT_XYZW;
-}
-
-template <class T>
-__TEXTURE_FUNCTIONS_DECL__ T tex1DGrad(hipTextureObject_t textureObject, float x, float dx,
-                                       float dy) {
-    T ret;
-    tex1DLod(&ret, textureObject, x, dx, dy);
-    return ret;
-}
-
-////////////////////////////////////////////////////////////
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(char* retVal, hipTextureObject_t textureObject, float x,
-                                      float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(char1* retVal, hipTextureObject_t textureObject, float x,
-                                      float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(char2* retVal, hipTextureObject_t textureObject, float x,
-                                      float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(char4* retVal, hipTextureObject_t textureObject, float x,
-                                      float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(unsigned char* retVal, hipTextureObject_t textureObject,
-                                      float x, float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(uchar1* retVal, hipTextureObject_t textureObject, float x,
-                                      float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(uchar2* retVal, hipTextureObject_t textureObject, float x,
-                                      float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(uchar4* retVal, hipTextureObject_t textureObject, float x,
-                                      float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(short* retVal, hipTextureObject_t textureObject, float x,
-                                      float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(short1* retVal, hipTextureObject_t textureObject, float x,
-                                      float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(short2* retVal, hipTextureObject_t textureObject, float x,
-                                      float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(short4* retVal, hipTextureObject_t textureObject, float x,
-                                      float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(unsigned short* retVal, hipTextureObject_t textureObject,
-                                      float x, float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(ushort1* retVal, hipTextureObject_t textureObject, float x,
-                                      float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(ushort2* retVal, hipTextureObject_t textureObject, float x,
-                                      float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(ushort4* retVal, hipTextureObject_t textureObject, float x,
-                                      float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(int* retVal, hipTextureObject_t textureObject, float x,
-                                      float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(int1* retVal, hipTextureObject_t textureObject, float x,
-                                      float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(int2* retVal, hipTextureObject_t textureObject, float x,
-                                      float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(int4* retVal, hipTextureObject_t textureObject, float x,
-                                      float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(unsigned int* retVal, hipTextureObject_t textureObject,
-                                      float x, float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(uint1* retVal, hipTextureObject_t textureObject, float x,
-                                      float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(uint2* retVal, hipTextureObject_t textureObject, float x,
-                                      float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(uint4* retVal, hipTextureObject_t textureObject, float x,
-                                      float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(float* retVal, hipTextureObject_t textureObject, float x,
-                                      float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_SET_FLOAT;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(float1* retVal, hipTextureObject_t textureObject, float x,
-                                      float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_SET_FLOAT_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(float2* retVal, hipTextureObject_t textureObject, float x,
-                                      float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_SET_FLOAT_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2D(float4* retVal, hipTextureObject_t textureObject, float x,
-                                      float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_SET_FLOAT_XYZW;
-}
-
-template <class T>
-__TEXTURE_FUNCTIONS_DECL__ T tex2D(hipTextureObject_t textureObject, float x, float y) {
-    T ret;
-    tex2D(&ret, textureObject, x, y);
-    return ret;
-}
-
-////////////////////////////////////////////////////////////
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(char* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(char1* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(char2* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(char4* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(unsigned char* retVal, hipTextureObject_t textureObject,
-                                         float x, float y, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(uchar1* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(uchar2* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(uchar4* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(short* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(short1* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(short2* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(short4* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(unsigned short* retVal, hipTextureObject_t textureObject,
-                                         float x, float y, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(ushort1* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(ushort2* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(ushort4* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(int* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(int1* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(int2* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(int4* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(unsigned int* retVal, hipTextureObject_t textureObject,
-                                         float x, float y, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(uint1* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(uint2* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(uint4* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(float* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_SET_FLOAT;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(float1* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_SET_FLOAT_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(float2* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_SET_FLOAT_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(float4* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_SET_FLOAT_XYZW;
-}
-
-template <class T>
-__TEXTURE_FUNCTIONS_DECL__ T tex2DLod(hipTextureObject_t textureObject, float x, float y,
-                                      float level) {
-    T ret;
-    tex2DLod(&ret, textureObject, x, y, level);
-    return ret;
-}
-
-////////////////////////////////////////////////////////////
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(char* retVal, hipTextureObject_t textureObject, float x,
-                                      float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(char1* retVal, hipTextureObject_t textureObject, float x,
-                                      float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(char2* retVal, hipTextureObject_t textureObject, float x,
-                                      float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(char4* retVal, hipTextureObject_t textureObject, float x,
-                                      float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(unsigned char* retVal, hipTextureObject_t textureObject,
-                                      float x, float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(uchar1* retVal, hipTextureObject_t textureObject, float x,
-                                      float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(uchar2* retVal, hipTextureObject_t textureObject, float x,
-                                      float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(uchar4* retVal, hipTextureObject_t textureObject, float x,
-                                      float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(short* retVal, hipTextureObject_t textureObject, float x,
-                                      float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(short1* retVal, hipTextureObject_t textureObject, float x,
-                                      float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(short2* retVal, hipTextureObject_t textureObject, float x,
-                                      float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(short4* retVal, hipTextureObject_t textureObject, float x,
-                                      float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(unsigned short* retVal, hipTextureObject_t textureObject,
-                                      float x, float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(ushort1* retVal, hipTextureObject_t textureObject, float x,
-                                      float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(ushort2* retVal, hipTextureObject_t textureObject, float x,
-                                      float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(ushort4* retVal, hipTextureObject_t textureObject, float x,
-                                      float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(int* retVal, hipTextureObject_t textureObject, float x,
-                                      float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(int1* retVal, hipTextureObject_t textureObject, float x,
-                                      float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(int2* retVal, hipTextureObject_t textureObject, float x,
-                                      float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(int4* retVal, hipTextureObject_t textureObject, float x,
-                                      float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(unsigned int* retVal, hipTextureObject_t textureObject,
-                                      float x, float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(uint1* retVal, hipTextureObject_t textureObject, float x,
-                                      float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(uint2* retVal, hipTextureObject_t textureObject, float x,
-                                      float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(uint4* retVal, hipTextureObject_t textureObject, float x,
-                                      float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(float* retVal, hipTextureObject_t textureObject, float x,
-                                      float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_SET_FLOAT;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(float1* retVal, hipTextureObject_t textureObject, float x,
-                                      float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_SET_FLOAT_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(float2* retVal, hipTextureObject_t textureObject, float x,
-                                      float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_SET_FLOAT_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3D(float4* retVal, hipTextureObject_t textureObject, float x,
-                                      float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_SET_FLOAT_XYZW;
-}
-
-template <class T>
-__TEXTURE_FUNCTIONS_DECL__ T tex3D(hipTextureObject_t textureObject, float x, float y, float z) {
-    T ret;
-    tex3D(&ret, textureObject, x, y, z);
-    return ret;
-}
-
-////////////////////////////////////////////////////////////
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(char* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(char1* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(char2* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(char4* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(unsigned char* retVal, hipTextureObject_t textureObject,
-                                         float x, float y, float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(uchar1* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(uchar2* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(uchar4* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(short* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(short1* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(short2* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(short4* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(unsigned short* retVal, hipTextureObject_t textureObject,
-                                         float x, float y, float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(ushort1* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(ushort2* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(ushort4* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(int* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(int1* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(int2* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(int4* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(unsigned int* retVal, hipTextureObject_t textureObject,
-                                         float x, float y, float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(uint1* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(uint2* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(uint4* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(float* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_SET_FLOAT;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(float1* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_SET_FLOAT_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(float2* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_SET_FLOAT_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(float4* retVal, hipTextureObject_t textureObject, float x,
-                                         float y, float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_SET_FLOAT_XYZW;
-}
-
-template <class T>
-__TEXTURE_FUNCTIONS_DECL__ T tex3DLod(hipTextureObject_t textureObject, float x, float y, float z,
-                                      float level) {
-    T ret;
-    tex3DLod(&ret, textureObject, x, y, z, level);
-    return ret;
-}
-
-////////////////////////////////////////////////////////////
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(char* retVal, hipTextureObject_t textureObject,
-                                             float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(char1* retVal, hipTextureObject_t textureObject,
-                                             float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(char2* retVal, hipTextureObject_t textureObject,
-                                             float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_SET_SIGNED_XY;
-}
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(char4* retVal, hipTextureObject_t textureObject,
-                                             float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(unsigned char* retVal,
-                                             hipTextureObject_t textureObject, float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(uchar1* retVal, hipTextureObject_t textureObject,
-                                             float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(uchar2* retVal, hipTextureObject_t textureObject,
-                                             float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(uchar4* retVal, hipTextureObject_t textureObject,
-                                             float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(short* retVal, hipTextureObject_t textureObject,
-                                             float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(short1* retVal, hipTextureObject_t textureObject,
-                                             float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(short2* retVal, hipTextureObject_t textureObject,
-                                             float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_SET_SIGNED_XY;
-}
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(short4* retVal, hipTextureObject_t textureObject,
-                                             float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(unsigned short* retVal,
-                                             hipTextureObject_t textureObject, float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(ushort1* retVal, hipTextureObject_t textureObject,
-                                             float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(ushort2* retVal, hipTextureObject_t textureObject,
-                                             float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(ushort4* retVal, hipTextureObject_t textureObject,
-                                             float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(int* retVal, hipTextureObject_t textureObject, float x,
-                                             int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(int1* retVal, hipTextureObject_t textureObject,
-                                             float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(int2* retVal, hipTextureObject_t textureObject,
-                                             float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_SET_SIGNED_XY;
-}
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(int4* retVal, hipTextureObject_t textureObject,
-                                             float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(unsigned int* retVal, hipTextureObject_t textureObject,
-                                             float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(uint1* retVal, hipTextureObject_t textureObject,
-                                             float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(uint2* retVal, hipTextureObject_t textureObject,
-                                             float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(uint4* retVal, hipTextureObject_t textureObject,
-                                             float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(float* retVal, hipTextureObject_t textureObject,
-                                             float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_SET_FLOAT;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(float1* retVal, hipTextureObject_t textureObject,
-                                             float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_SET_FLOAT_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(float2* retVal, hipTextureObject_t textureObject,
-                                             float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_SET_FLOAT_XY;
-}
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(float4* retVal, hipTextureObject_t textureObject,
-                                             float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_SET_FLOAT_XYZW;
-}
-
-template <class T>
-__TEXTURE_FUNCTIONS_DECL__ T tex1DLayered(hipTextureObject_t textureObject, float x, int layer) {
-    T ret;
-    tex1DLayered(&ret, textureObject, x, layer);
-    return ret;
-}
-
-////////////////////////////////////////////////////////////
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(char* retVal, hipTextureObject_t textureObject,
-                                                float x, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(char1* retVal, hipTextureObject_t textureObject,
-                                                float x, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(char2* retVal, hipTextureObject_t textureObject,
-                                                float x, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(char4* retVal, hipTextureObject_t textureObject,
-                                                float x, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(unsigned char* retVal,
-                                                hipTextureObject_t textureObject, float x,
-                                                int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(uchar1* retVal, hipTextureObject_t textureObject,
-                                                float x, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(uchar2* retVal, hipTextureObject_t textureObject,
-                                                float x, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(uchar4* retVal, hipTextureObject_t textureObject,
-                                                float x, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(short* retVal, hipTextureObject_t textureObject,
-                                                float x, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(short1* retVal, hipTextureObject_t textureObject,
-                                                float x, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(short2* retVal, hipTextureObject_t textureObject,
-                                                float x, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(short4* retVal, hipTextureObject_t textureObject,
-                                                float x, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(unsigned short* retVal,
-                                                hipTextureObject_t textureObject, float x,
-                                                int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(ushort1* retVal, hipTextureObject_t textureObject,
-                                                float x, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(ushort2* retVal, hipTextureObject_t textureObject,
-                                                float x, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(ushort4* retVal, hipTextureObject_t textureObject,
-                                                float x, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(int* retVal, hipTextureObject_t textureObject,
-                                                float x, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(int1* retVal, hipTextureObject_t textureObject,
-                                                float x, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(int2* retVal, hipTextureObject_t textureObject,
-                                                float x, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(int4* retVal, hipTextureObject_t textureObject,
-                                                float x, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(unsigned int* retVal,
-                                                hipTextureObject_t textureObject, float x,
-                                                int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(uint1* retVal, hipTextureObject_t textureObject,
-                                                float x, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(uint2* retVal, hipTextureObject_t textureObject,
-                                                float x, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(uint4* retVal, hipTextureObject_t textureObject,
-                                                float x, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(float* retVal, hipTextureObject_t textureObject,
-                                                float x, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_SET_FLOAT;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(float1* retVal, hipTextureObject_t textureObject,
-                                                float x, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_SET_FLOAT_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(float2* retVal, hipTextureObject_t textureObject,
-                                                float x, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_SET_FLOAT_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(float4* retVal, hipTextureObject_t textureObject,
-                                                float x, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_SET_FLOAT_XYZW;
-}
-
-template <class T>
-__TEXTURE_FUNCTIONS_DECL__ T tex1DLayeredLod(hipTextureObject_t textureObject, float x, int layer,
-                                             float level) {
-    T ret;
-    tex1DLayeredLod(&ret, textureObject, x, layer, level);
-    return ret;
-}
-
-////////////////////////////////////////////////////////////
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(char* retVal, hipTextureObject_t textureObject,
-                                                 float x, int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(char1* retVal, hipTextureObject_t textureObject,
-                                                 float x, int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(char2* retVal, hipTextureObject_t textureObject,
-                                                 float x, int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(char4* retVal, hipTextureObject_t textureObject,
-                                                 float x, int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(unsigned char* retVal,
-                                                 hipTextureObject_t textureObject, float x,
-                                                 int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(uchar1* retVal, hipTextureObject_t textureObject,
-                                                 float x, int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(uchar2* retVal, hipTextureObject_t textureObject,
-                                                 float x, int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(uchar4* retVal, hipTextureObject_t textureObject,
-                                                 float x, int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(short* retVal, hipTextureObject_t textureObject,
-                                                 float x, int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(short1* retVal, hipTextureObject_t textureObject,
-                                                 float x, int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(short2* retVal, hipTextureObject_t textureObject,
-                                                 float x, int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(short4* retVal, hipTextureObject_t textureObject,
-                                                 float x, int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(unsigned short* retVal,
-                                                 hipTextureObject_t textureObject, float x,
-                                                 int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(ushort1* retVal, hipTextureObject_t textureObject,
-                                                 float x, int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(ushort2* retVal, hipTextureObject_t textureObject,
-                                                 float x, int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(ushort4* retVal, hipTextureObject_t textureObject,
-                                                 float x, int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(int* retVal, hipTextureObject_t textureObject,
-                                                 float x, int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(int1* retVal, hipTextureObject_t textureObject,
-                                                 float x, int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(int2* retVal, hipTextureObject_t textureObject,
-                                                 float x, int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(int4* retVal, hipTextureObject_t textureObject,
-                                                 float x, int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(unsigned int* retVal,
-                                                 hipTextureObject_t textureObject, float x,
-                                                 int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(uint1* retVal, hipTextureObject_t textureObject,
-                                                 float x, int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(uint2* retVal, hipTextureObject_t textureObject,
-                                                 float x, int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(uint4* retVal, hipTextureObject_t textureObject,
-                                                 float x, int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(float* retVal, hipTextureObject_t textureObject,
-                                                 float x, int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_SET_FLOAT;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(float1* retVal, hipTextureObject_t textureObject,
-                                                 float x, int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_SET_FLOAT_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(float2* retVal, hipTextureObject_t textureObject,
-                                                 float x, int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_SET_FLOAT_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(float4* retVal, hipTextureObject_t textureObject,
-                                                 float x, int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_SET_FLOAT_XYZW;
-}
-
-template <class T>
-__TEXTURE_FUNCTIONS_DECL__ T tex1DLayeredGrad(hipTextureObject_t textureObject, float x, int layer,
-                                              float dx, float dy) {
-    T ret;
-    tex1DLayeredGrad(&ret, textureObject, x, layer, dx, dy);
-    return ret;
-}
-
-////////////////////////////////////////////////////////////
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(char* retVal, hipTextureObject_t textureObject,
-                                             float x, float y, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(char1* retVal, hipTextureObject_t textureObject,
-                                             float x, float y, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(char2* retVal, hipTextureObject_t textureObject,
-                                             float x, float y, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(char4* retVal, hipTextureObject_t textureObject,
-                                             float x, float y, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(unsigned char* retVal,
-                                             hipTextureObject_t textureObject, float x, float y,
-                                             int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(uchar1* retVal, hipTextureObject_t textureObject,
-                                             float x, float y, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(uchar2* retVal, hipTextureObject_t textureObject,
-                                             float x, float y, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(uchar4* retVal, hipTextureObject_t textureObject,
-                                             float x, float y, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(short* retVal, hipTextureObject_t textureObject,
-                                             float x, float y, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(short1* retVal, hipTextureObject_t textureObject,
-                                             float x, float y, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(short2* retVal, hipTextureObject_t textureObject,
-                                             float x, float y, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(short4* retVal, hipTextureObject_t textureObject,
-                                             float x, float y, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(unsigned short* retVal,
-                                             hipTextureObject_t textureObject, float x, float y,
-                                             int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(ushort1* retVal, hipTextureObject_t textureObject,
-                                             float x, float y, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(ushort2* retVal, hipTextureObject_t textureObject,
-                                             float x, float y, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(ushort4* retVal, hipTextureObject_t textureObject,
-                                             float x, float y, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(int* retVal, hipTextureObject_t textureObject, float x,
-                                             float y, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(int1* retVal, hipTextureObject_t textureObject,
-                                             float x, float y, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(int2* retVal, hipTextureObject_t textureObject,
-                                             float x, float y, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(int4* retVal, hipTextureObject_t textureObject,
-                                             float x, float y, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(unsigned int* retVal, hipTextureObject_t textureObject,
-                                             float x, float y, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(uint1* retVal, hipTextureObject_t textureObject,
-                                             float x, float y, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(uint2* retVal, hipTextureObject_t textureObject,
-                                             float x, float y, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(uint4* retVal, hipTextureObject_t textureObject,
-                                             float x, float y, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(float* retVal, hipTextureObject_t textureObject,
-                                             float x, float y, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_SET_FLOAT;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(float1* retVal, hipTextureObject_t textureObject,
-                                             float x, float y, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_SET_FLOAT_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(float2* retVal, hipTextureObject_t textureObject,
-                                             float x, float y, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_SET_FLOAT_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(float4* retVal, hipTextureObject_t textureObject,
-                                             float x, float y, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_SET_FLOAT_XYZW;
-}
-
-template <class T>
-__TEXTURE_FUNCTIONS_DECL__ T tex2DLayered(hipTextureObject_t textureObject, float x, float y,
-                                          int layer) {
-    T ret;
-    tex2DLayered(&ret, textureObject, x, y, layer);
-    return ret;
-}
-
-////////////////////////////////////////////////////////////
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(char* retVal, hipTextureObject_t textureObject,
-                                                float x, float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(char1* retVal, hipTextureObject_t textureObject,
-                                                float x, float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(char2* retVal, hipTextureObject_t textureObject,
-                                                float x, float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(char4* retVal, hipTextureObject_t textureObject,
-                                                float x, float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(unsigned char* retVal,
-                                                hipTextureObject_t textureObject, float x, float y,
-                                                int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(uchar1* retVal, hipTextureObject_t textureObject,
-                                                float x, float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(uchar2* retVal, hipTextureObject_t textureObject,
-                                                float x, float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(uchar4* retVal, hipTextureObject_t textureObject,
-                                                float x, float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(short* retVal, hipTextureObject_t textureObject,
-                                                float x, float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(short1* retVal, hipTextureObject_t textureObject,
-                                                float x, float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(short2* retVal, hipTextureObject_t textureObject,
-                                                float x, float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(short4* retVal, hipTextureObject_t textureObject,
-                                                float x, float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(unsigned short* retVal,
-                                                hipTextureObject_t textureObject, float x, float y,
-                                                int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(ushort1* retVal, hipTextureObject_t textureObject,
-                                                float x, float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(ushort2* retVal, hipTextureObject_t textureObject,
-                                                float x, float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(ushort4* retVal, hipTextureObject_t textureObject,
-                                                float x, float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(int* retVal, hipTextureObject_t textureObject,
-                                                float x, float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_SET_SIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(int1* retVal, hipTextureObject_t textureObject,
-                                                float x, float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_SET_SIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(int2* retVal, hipTextureObject_t textureObject,
-                                                float x, float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_SET_SIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(int4* retVal, hipTextureObject_t textureObject,
-                                                float x, float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_SET_SIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(unsigned int* retVal,
-                                                hipTextureObject_t textureObject, float x, float y,
-                                                int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_SET_UNSIGNED;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(uint1* retVal, hipTextureObject_t textureObject,
-                                                float x, float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_SET_UNSIGNED_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(uint2* retVal, hipTextureObject_t textureObject,
-                                                float x, float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_SET_UNSIGNED_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(uint4* retVal, hipTextureObject_t textureObject,
-                                                float x, float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_SET_UNSIGNED_XYZW;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(float* retVal, hipTextureObject_t textureObject,
-                                                float x, float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_SET_FLOAT;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(float1* retVal, hipTextureObject_t textureObject,
-                                                float x, float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_SET_FLOAT_X;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(float2* retVal, hipTextureObject_t textureObject,
-                                                float x, float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_SET_FLOAT_XY;
-}
-
-__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(float4* retVal, hipTextureObject_t textureObject,
-                                                float x, float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_SET_FLOAT_XYZW;
-}
-
-template <class T>
-__TEXTURE_FUNCTIONS_DECL__ T tex2DLayeredLod(hipTextureObject_t textureObject, float x, float y,
-                                             int layer, float level) {
-    T ret;
-    tex2DLayeredLod(&ret, textureObject, x, y, layer, level);
-    return ret;
-}
-
-////////////////////////////////////////////////////////////
-// Texture Reference APIs
-////////////////////////////////////////////////////////////
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex1Dfetch(texture<char, texType, mode> texRef, int x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex1Dfetch(texture<char1, texType, mode> texRef, int x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex1Dfetch(texture<char2, texType, mode> texRef, int x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex1Dfetch(texture<char4, texType, mode> texRef, int x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1Dfetch(texture<unsigned char, texType, mode> texRef,
-                                                    int x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1Dfetch(texture<uchar1, texType, mode> texRef, int x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1Dfetch(texture<uchar2, texType, mode> texRef, int x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1Dfetch(texture<uchar4, texType, mode> texRef, int x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex1Dfetch(texture<short, texType, mode> texRef, int x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex1Dfetch(texture<short1, texType, mode> texRef, int x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex1Dfetch(texture<short2, texType, mode> texRef, int x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex1Dfetch(texture<short4, texType, mode> texRef, int x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1Dfetch(texture<ushort1, texType, mode> texRef, int x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1Dfetch(texture<unsigned short, texType, mode> texRef,
-                                                     int x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1Dfetch(texture<ushort2, texType, mode> texRef, int x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1Dfetch(texture<ushort4, texType, mode> texRef, int x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex1Dfetch(texture<int1, texType, mode> texRef, int x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex1Dfetch(texture<int, texType, mode> texRef, int x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex1Dfetch(texture<int2, texType, mode> texRef, int x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex1Dfetch(texture<int4, texType, mode> texRef, int x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1Dfetch(texture<unsigned int, texType, mode> texRef,
-                                                   int x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex1Dfetch(texture<uint1, texType, mode> texRef, int x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex1Dfetch(texture<uint2, texType, mode> texRef, int x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex1Dfetch(texture<uint4, texType, mode> texRef, int x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex1Dfetch(texture<float, texType, mode> texRef, int x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex1Dfetch(texture<float1, texType, mode> texRef, int x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex1Dfetch(texture<float2, texType, mode> texRef, int x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex1Dfetch(texture<float4, texType, mode> texRef, int x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex1Dfetch(texture<char, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex1Dfetch(texture<char1, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex1Dfetch(texture<char2, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex1Dfetch(texture<char4, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1Dfetch(texture<unsigned char, texType, mode> texRef,
-                                                    hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1Dfetch(texture<uchar1, texType, mode> texRef,
-                                             hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1Dfetch(texture<uchar2, texType, mode> texRef,
-                                             hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1Dfetch(texture<uchar4, texType, mode> texRef,
-                                             hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex1Dfetch(texture<short, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex1Dfetch(texture<short1, texType, mode> texRef,
-                                             hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex1Dfetch(texture<short2, texType, mode> texRef,
-                                             hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex1Dfetch(texture<short4, texType, mode> texRef,
-                                             hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1Dfetch(texture<ushort1, texType, mode> texRef,
-                                              hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1Dfetch(texture<unsigned short, texType, mode> texRef,
-                                                     hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1Dfetch(texture<ushort2, texType, mode> texRef,
-                                              hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1Dfetch(texture<ushort4, texType, mode> texRef,
-                                              hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex1Dfetch(texture<int1, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex1Dfetch(texture<int, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex1Dfetch(texture<int2, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex1Dfetch(texture<int4, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1Dfetch(texture<unsigned int, texType, mode> texRef,
-                                                   hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex1Dfetch(texture<uint1, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex1Dfetch(texture<uint2, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex1Dfetch(texture<uint4, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex1Dfetch(texture<float, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex1Dfetch(texture<float1, texType, mode> texRef,
-                                             hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex1Dfetch(texture<float2, texType, mode> texRef,
-                                             hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex1Dfetch(texture<float4, texType, mode> texRef,
-                                             hipTextureObject_t textureObject, int x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex1D(texture<char, texType, mode> texRef, float x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex1D(texture<char1, texType, mode> texRef, float x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex1D(texture<char2, texType, mode> texRef, float x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex1D(texture<char4, texType, mode> texRef, float x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1D(texture<unsigned char, texType, mode> texRef,
-                                               float x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1D(texture<uchar1, texType, mode> texRef, float x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1D(texture<uchar2, texType, mode> texRef, float x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1D(texture<uchar4, texType, mode> texRef, float x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex1D(texture<short, texType, mode> texRef, float x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex1D(texture<short1, texType, mode> texRef, float x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex1D(texture<short2, texType, mode> texRef, float x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex1D(texture<short4, texType, mode> texRef, float x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1D(texture<unsigned short, texType, mode> texRef,
-                                                float x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1D(texture<ushort1, texType, mode> texRef, float x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1D(texture<ushort2, texType, mode> texRef, float x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1D(texture<ushort4, texType, mode> texRef, float x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex1D(texture<int, texType, mode> texRef, float x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex1D(texture<int1, texType, mode> texRef, float x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex1D(texture<int2, texType, mode> texRef, float x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex1D(texture<int4, texType, mode> texRef, float x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1D(texture<unsigned int, texType, mode> texRef, float x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex1D(texture<uint1, texType, mode> texRef, float x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex1D(texture<uint2, texType, mode> texRef, float x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex1D(texture<uint4, texType, mode> texRef, float x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex1D(texture<float1, texType, mode> texRef, float x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex1D(texture<float2, texType, mode> texRef, float x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex1D(texture<float4, texType, mode> texRef, float x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex1D(texture<char, texType, mode> texRef,
-                                      hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex1D(texture<char1, texType, mode> texRef,
-                                       hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex1D(texture<char2, texType, mode> texRef,
-                                       hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex1D(texture<char4, texType, mode> texRef,
-                                       hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1D(texture<unsigned char, texType, mode> texRef,
-                                               hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1D(texture<uchar1, texType, mode> texRef,
-                                        hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1D(texture<uchar2, texType, mode> texRef,
-                                        hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1D(texture<uchar4, texType, mode> texRef,
-                                        hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex1D(texture<short, texType, mode> texRef,
-                                       hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex1D(texture<short1, texType, mode> texRef,
-                                        hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex1D(texture<short2, texType, mode> texRef,
-                                        hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex1D(texture<short4, texType, mode> texRef,
-                                        hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1D(texture<unsigned short, texType, mode> texRef,
-                                                hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1D(texture<ushort1, texType, mode> texRef,
-                                         hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1D(texture<ushort2, texType, mode> texRef,
-                                         hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1D(texture<ushort4, texType, mode> texRef,
-                                         hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex1D(texture<int, texType, mode> texRef,
-                                     hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex1D(texture<int1, texType, mode> texRef,
-                                      hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex1D(texture<int2, texType, mode> texRef,
-                                      hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex1D(texture<int4, texType, mode> texRef,
-                                      hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1D(texture<unsigned int, texType, mode> texRef,
-                                              hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex1D(texture<uint1, texType, mode> texRef,
-                                       hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex1D(texture<uint2, texType, mode> texRef,
-                                       hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex1D(texture<uint4, texType, mode> texRef,
-                                       hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex1D(texture<float, texType, mode> texRef,
-                                       hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_FLOAT;
-}
-//////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex1D(texture<float, texType, mode> texRef, float x) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex1D(texture<float1, texType, mode> texRef,
-                                        hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex1D(texture<float2, texType, mode> texRef,
-                                        hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex1D(texture<float4, texType, mode> texRef,
-                                        hipTextureObject_t textureObject, float x) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1D(i, s, x);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex1DLod(texture<char, texType, mode> texRef, float x,
-                                         float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLod(texture<char1, texType, mode> texRef, float x,
-                                          float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_CHAR_X;
-}
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLod(texture<char2, texType, mode> texRef, float x,
-                                          float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLod(texture<char4, texType, mode> texRef, float x,
-                                          float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLod(texture<unsigned char, texType, mode> texRef,
-                                                  float x, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLod(texture<uchar1, texType, mode> texRef, float x,
-                                           float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLod(texture<uchar2, texType, mode> texRef, float x,
-                                           float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLod(texture<uchar4, texType, mode> texRef, float x,
-                                           float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex1DLod(texture<short, texType, mode> texRef, float x,
-                                          float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLod(texture<short1, texType, mode> texRef, float x,
-                                           float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLod(texture<short2, texType, mode> texRef, float x,
-                                           float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLod(texture<short4, texType, mode> texRef, float x,
-                                           float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLod(texture<unsigned short, texType, mode> texRef,
-                                                   float x, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLod(texture<ushort1, texType, mode> texRef, float x,
-                                            float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLod(texture<ushort2, texType, mode> texRef, float x,
-                                            float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLod(texture<ushort4, texType, mode> texRef, float x,
-                                            float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex1DLod(texture<int, texType, mode> texRef, float x, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLod(texture<int1, texType, mode> texRef, float x,
-                                         float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLod(texture<int2, texType, mode> texRef, float x,
-                                         float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLod(texture<int4, texType, mode> texRef, float x,
-                                         float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLod(texture<unsigned int, texType, mode> texRef,
-                                                 float x, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLod(texture<uint1, texType, mode> texRef, float x,
-                                          float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLod(texture<uint2, texType, mode> texRef, float x,
-                                          float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLod(texture<uint4, texType, mode> texRef, float x,
-                                          float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex1DLod(texture<float, texType, mode> texRef, float x,
-                                          float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLod(texture<float1, texType, mode> texRef, float x,
-                                           float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLod(texture<float2, texType, mode> texRef, float x,
-                                           float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLod(texture<float4, texType, mode> texRef, float x,
-                                           float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex1DLod(texture<char, texType, mode> texRef,
-                                         hipTextureObject_t textureObject, float x, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLod(texture<char1, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_CHAR_X;
-}
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLod(texture<char2, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLod(texture<char4, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLod(texture<unsigned char, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLod(texture<uchar1, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLod(texture<uchar2, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLod(texture<uchar4, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex1DLod(texture<short, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLod(texture<short1, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLod(texture<short2, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLod(texture<short4, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLod(texture<unsigned short, texType, mode> texRef,
-                                                   hipTextureObject_t textureObject, float x,
-                                                   float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLod(texture<ushort1, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x,
-                                            float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLod(texture<ushort2, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x,
-                                            float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLod(texture<ushort4, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x,
-                                            float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex1DLod(texture<int, texType, mode> texRef,
-                                        hipTextureObject_t textureObject, float x, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLod(texture<int1, texType, mode> texRef,
-                                         hipTextureObject_t textureObject, float x, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLod(texture<int2, texType, mode> texRef,
-                                         hipTextureObject_t textureObject, float x, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLod(texture<int4, texType, mode> texRef,
-                                         hipTextureObject_t textureObject, float x, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLod(texture<unsigned int, texType, mode> texRef,
-                                                 hipTextureObject_t textureObject, float x,
-                                                 float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLod(texture<uint1, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLod(texture<uint2, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLod(texture<uint4, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex1DLod(texture<float, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLod(texture<float1, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLod(texture<float2, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLod(texture<float4, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_1D(i, s, x, level);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex1DGrad(texture<char, texType, mode> texRef, float x, float dx,
-                                          float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex1DGrad(texture<char1, texType, mode> texRef, float x, float dx,
-                                           float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex1DGrad(texture<char2, texType, mode> texRef, float x, float dx,
-                                           float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex1DGrad(texture<char4, texType, mode> texRef, float x, float dx,
-                                           float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DGrad(texture<unsigned char, texType, mode> texRef,
-                                                   float x, float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DGrad(texture<uchar1, texType, mode> texRef, float x,
-                                            float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DGrad(texture<uchar2, texType, mode> texRef, float x,
-                                            float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DGrad(texture<uchar4, texType, mode> texRef, float x,
-                                            float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex1DGrad(texture<short, texType, mode> texRef, float x, float dx,
-                                           float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex1DGrad(texture<short1, texType, mode> texRef, float x,
-                                            float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex1DGrad(texture<short2, texType, mode> texRef, float x,
-                                            float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex1DGrad(texture<short4, texType, mode> texRef, float x,
-                                            float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DGrad(texture<unsigned short, texType, mode> texRef,
-                                                    float x, float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DGrad(texture<ushort1, texType, mode> texRef, float x,
-                                             float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DGrad(texture<ushort2, texType, mode> texRef, float x,
-                                             float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DGrad(texture<ushort4, texType, mode> texRef, float x,
-                                             float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex1DGrad(texture<int, texType, mode> texRef, float x, float dx,
-                                         float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex1DGrad(texture<int1, texType, mode> texRef, float x, float dx,
-                                          float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex1DGrad(texture<int2, texType, mode> texRef, float x, float dx,
-                                          float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex1DGrad(texture<int4, texType, mode> texRef, float x, float dx,
-                                          float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DGrad(texture<unsigned int, texType, mode> texRef,
-                                                  float x, float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DGrad(texture<uint1, texType, mode> texRef, float x, float dx,
-                                           float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DGrad(texture<uint2, texType, mode> texRef, float x, float dx,
-                                           float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DGrad(texture<uint4, texType, mode> texRef, float x, float dx,
-                                           float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex1DGrad(texture<float, texType, mode> texRef, float x, float dx,
-                                           float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex1DGrad(texture<float1, texType, mode> texRef, float x,
-                                            float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex1DGrad(texture<float2, texType, mode> texRef, float x,
-                                            float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex1DGrad(texture<float4, texType, mode> texRef, float x,
-                                            float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex1DGrad(texture<char, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float dx,
-                                          float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex1DGrad(texture<char1, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float dx,
-                                           float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex1DGrad(texture<char2, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float dx,
-                                           float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex1DGrad(texture<char4, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float dx,
-                                           float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DGrad(texture<unsigned char, texType, mode> texRef,
-                                                   hipTextureObject_t textureObject, float x,
-                                                   float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DGrad(texture<uchar1, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, float dx,
-                                            float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DGrad(texture<uchar2, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, float dx,
-                                            float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DGrad(texture<uchar4, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, float dx,
-                                            float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex1DGrad(texture<short, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float dx,
-                                           float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex1DGrad(texture<short1, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, float dx,
-                                            float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex1DGrad(texture<short2, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, float dx,
-                                            float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex1DGrad(texture<short4, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, float dx,
-                                            float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DGrad(texture<unsigned short, texType, mode> texRef,
-                                                    hipTextureObject_t textureObject, float x,
-                                                    float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DGrad(texture<ushort1, texType, mode> texRef,
-                                             hipTextureObject_t textureObject, float x, float dx,
-                                             float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DGrad(texture<ushort2, texType, mode> texRef,
-                                             hipTextureObject_t textureObject, float x, float dx,
-                                             float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DGrad(texture<ushort4, texType, mode> texRef,
-                                             hipTextureObject_t textureObject, float x, float dx,
-                                             float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex1DGrad(texture<int, texType, mode> texRef,
-                                         hipTextureObject_t textureObject, float x, float dx,
-                                         float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex1DGrad(texture<int1, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float dx,
-                                          float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex1DGrad(texture<int2, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float dx,
-                                          float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex1DGrad(texture<int4, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float dx,
-                                          float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DGrad(texture<unsigned int, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DGrad(texture<uint1, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float dx,
-                                           float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DGrad(texture<uint2, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float dx,
-                                           float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DGrad(texture<uint4, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float dx,
-                                           float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex1DGrad(texture<float, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float dx,
-                                           float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex1DGrad(texture<float1, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, float dx,
-                                            float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex1DGrad(texture<float2, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, float dx,
-                                            float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex1DGrad(texture<float4, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, float dx,
-                                            float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex2D(texture<char, texType, mode> texRef, float x, float y) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex2D(texture<char1, texType, mode> texRef, float x, float y) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex2D(texture<char2, texType, mode> texRef, float x, float y) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex2D(texture<char4, texType, mode> texRef, float x, float y) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2D(texture<unsigned char, texType, mode> texRef,
-                                               float x, float y) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2D(texture<uchar1, texType, mode> texRef, float x, float y) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2D(texture<uchar2, texType, mode> texRef, float x, float y) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2D(texture<uchar4, texType, mode> texRef, float x, float y) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex2D(texture<short, texType, mode> texRef, float x, float y) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex2D(texture<short1, texType, mode> texRef, float x, float y) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex2D(texture<short2, texType, mode> texRef, float x, float y) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex2D(texture<short4, texType, mode> texRef, float x, float y) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2D(texture<unsigned short, texType, mode> texRef,
-                                                float x, float y) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2D(texture<ushort1, texType, mode> texRef, float x, float y) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2D(texture<ushort2, texType, mode> texRef, float x, float y) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2D(texture<ushort4, texType, mode> texRef, float x, float y) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex2D(texture<int, texType, mode> texRef, float x, float y) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex2D(texture<int1, texType, mode> texRef, float x, float y) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex2D(texture<int2, texType, mode> texRef, float x, float y) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex2D(texture<int4, texType, mode> texRef, float x, float y) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2D(texture<unsigned int, texType, mode> texRef, float x,
-                                              float y) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex2D(texture<uint1, texType, mode> texRef, float x, float y) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex2D(texture<uint2, texType, mode> texRef, float x, float y) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex2D(texture<uint4, texType, mode> texRef, float x, float y) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex2D(texture<char, texType, mode> texRef,
-                                      hipTextureObject_t textureObject, float x, float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex2D(texture<char1, texType, mode> texRef,
-                                       hipTextureObject_t textureObject, float x, float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex2D(texture<char2, texType, mode> texRef,
-                                       hipTextureObject_t textureObject, float x, float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex2D(texture<char4, texType, mode> texRef,
-                                       hipTextureObject_t textureObject, float x, float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2D(texture<unsigned char, texType, mode> texRef,
-                                               hipTextureObject_t textureObject, float x, float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2D(texture<uchar1, texType, mode> texRef,
-                                        hipTextureObject_t textureObject, float x, float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2D(texture<uchar2, texType, mode> texRef,
-                                        hipTextureObject_t textureObject, float x, float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2D(texture<uchar4, texType, mode> texRef,
-                                        hipTextureObject_t textureObject, float x, float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex2D(texture<short, texType, mode> texRef,
-                                       hipTextureObject_t textureObject, float x, float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex2D(texture<short1, texType, mode> texRef,
-                                        hipTextureObject_t textureObject, float x, float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex2D(texture<short2, texType, mode> texRef,
-                                        hipTextureObject_t textureObject, float x, float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex2D(texture<short4, texType, mode> texRef,
-                                        hipTextureObject_t textureObject, float x, float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2D(texture<unsigned short, texType, mode> texRef,
-                                                hipTextureObject_t textureObject, float x,
-                                                float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2D(texture<ushort1, texType, mode> texRef,
-                                         hipTextureObject_t textureObject, float x, float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2D(texture<ushort2, texType, mode> texRef,
-                                         hipTextureObject_t textureObject, float x, float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2D(texture<ushort4, texType, mode> texRef,
-                                         hipTextureObject_t textureObject, float x, float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex2D(texture<int, texType, mode> texRef,
-                                     hipTextureObject_t textureObject, float x, float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex2D(texture<int1, texType, mode> texRef,
-                                      hipTextureObject_t textureObject, float x, float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex2D(texture<int2, texType, mode> texRef,
-                                      hipTextureObject_t textureObject, float x, float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex2D(texture<int4, texType, mode> texRef,
-                                      hipTextureObject_t textureObject, float x, float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2D(texture<unsigned int, texType, mode> texRef,
-                                              hipTextureObject_t textureObject, float x, float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex2D(texture<uint1, texType, mode> texRef,
-                                       hipTextureObject_t textureObject, float x, float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex2D(texture<uint2, texType, mode> texRef,
-                                       hipTextureObject_t textureObject, float x, float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex2D(texture<uint4, texType, mode> texRef,
-                                       hipTextureObject_t textureObject, float x, float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex2D(texture<float, texType, mode> texRef,
-                                       hipTextureObject_t textureObject, float x, float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex2D(texture<float, texType, mode> texRef, float x, float y) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex2D(texture<float1, texType, mode> texRef, float x, float y) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex2D(texture<float1, texType, mode> texRef,
-                                        hipTextureObject_t textureObject, float x, float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex2D(texture<float2, texType, mode> texRef, float x, float y) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex2D(texture<float2, texType, mode> texRef,
-                                        hipTextureObject_t textureObject, float x, float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex2D(texture<float4, texType, mode> texRef, float x, float y) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex2D(texture<float4, texType, mode> texRef,
-                                        hipTextureObject_t textureObject, float x, float y) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex2DLod(texture<char, texType, mode> texRef, float x, float y,
-                                         float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLod(texture<char1, texType, mode> texRef, float x, float y,
-                                          float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLod(texture<char2, texType, mode> texRef, float x, float y,
-                                          float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLod(texture<char4, texType, mode> texRef, float x, float y,
-                                          float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLod(texture<unsigned char, texType, mode> texRef,
-                                                  float x, float y, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLod(texture<uchar1, texType, mode> texRef, float x, float y,
-                                           float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLod(texture<uchar2, texType, mode> texRef, float x, float y,
-                                           float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLod(texture<uchar4, texType, mode> texRef, float x, float y,
-                                           float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex2DLod(texture<short, texType, mode> texRef, float x, float y,
-                                          float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLod(texture<short1, texType, mode> texRef, float x, float y,
-                                           float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLod(texture<short2, texType, mode> texRef, float x, float y,
-                                           float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLod(texture<short4, texType, mode> texRef, float x, float y,
-                                           float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLod(texture<unsigned short, texType, mode> texRef,
-                                                   float x, float y, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLod(texture<ushort1, texType, mode> texRef, float x,
-                                            float y, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLod(texture<ushort2, texType, mode> texRef, float x,
-                                            float y, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLod(texture<ushort4, texType, mode> texRef, float x,
-                                            float y, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex2DLod(texture<int, texType, mode> texRef, float x, float y,
-                                        float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLod(texture<int1, texType, mode> texRef, float x, float y,
-                                         float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLod(texture<int2, texType, mode> texRef, float x, float y,
-                                         float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLod(texture<int4, texType, mode> texRef, float x, float y,
-                                         float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLod(texture<unsigned int, texType, mode> texRef,
-                                                 float x, float y, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLod(texture<uint1, texType, mode> texRef, float x, float y,
-                                          float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLod(texture<uint2, texType, mode> texRef, float x, float y,
-                                          float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLod(texture<uint4, texType, mode> texRef, float x, float y,
-                                          float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex2DLod(texture<float, texType, mode> texRef, float x, float y,
-                                          float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLod(texture<float1, texType, mode> texRef, float x, float y,
-                                           float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLod(texture<float2, texType, mode> texRef, float x, float y,
-                                           float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLod(texture<float4, texType, mode> texRef, float x, float y,
-                                           float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex2DLod(texture<char, texType, mode> texRef,
-                                         hipTextureObject_t textureObject, float x, float y,
-                                         float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLod(texture<char1, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float y,
-                                          float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLod(texture<char2, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float y,
-                                          float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLod(texture<char4, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float y,
-                                          float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLod(texture<unsigned char, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  float y, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLod(texture<uchar1, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float y,
-                                           float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLod(texture<uchar2, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float y,
-                                           float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLod(texture<uchar4, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float y,
-                                           float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex2DLod(texture<short, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float y,
-                                          float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLod(texture<short1, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float y,
-                                           float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLod(texture<short2, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float y,
-                                           float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLod(texture<short4, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float y,
-                                           float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLod(texture<unsigned short, texType, mode> texRef,
-                                                   hipTextureObject_t textureObject, float x,
-                                                   float y, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLod(texture<ushort1, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, float y,
-                                            float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLod(texture<ushort2, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, float y,
-                                            float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLod(texture<ushort4, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, float y,
-                                            float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex2DLod(texture<int, texType, mode> texRef,
-                                        hipTextureObject_t textureObject, float x, float y,
-                                        float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLod(texture<int1, texType, mode> texRef,
-                                         hipTextureObject_t textureObject, float x, float y,
-                                         float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLod(texture<int2, texType, mode> texRef,
-                                         hipTextureObject_t textureObject, float x, float y,
-                                         float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLod(texture<int4, texType, mode> texRef,
-                                         hipTextureObject_t textureObject, float x, float y,
-                                         float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLod(texture<unsigned int, texType, mode> texRef,
-                                                 hipTextureObject_t textureObject, float x, float y,
-                                                 float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLod(texture<uint1, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float y,
-                                          float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLod(texture<uint2, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float y,
-                                          float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLod(texture<uint4, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float y,
-                                          float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex2DLod(texture<float, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float y,
-                                          float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLod(texture<float1, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float y,
-                                           float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLod(texture<float2, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float y,
-                                           float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLod(texture<float4, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float y,
-                                           float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex2DGrad(texture<char, texType, mode> texRef, float x, float y,
-                                          float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex2DGrad(texture<char1, texType, mode> texRef, float x, float y,
-                                           float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex2DGrad(texture<char2, texType, mode> texRef, float x, float y,
-                                           float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex2DGrad(texture<char4, texType, mode> texRef, float x, float y,
-                                           float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DGrad(texture<unsigned char, texType, mode> texRef,
-                                                   float x, float y, float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DGrad(texture<uchar1, texType, mode> texRef, float x, float y,
-                                            float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DGrad(texture<uchar2, texType, mode> texRef, float x, float y,
-                                            float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DGrad(texture<uchar4, texType, mode> texRef, float x, float y,
-                                            float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex2DGrad(texture<short, texType, mode> texRef, float x, float y,
-                                           float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex2DGrad(texture<short1, texType, mode> texRef, float x, float y,
-                                            float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex2DGrad(texture<short2, texType, mode> texRef, float x, float y,
-                                            float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex2DGrad(texture<short4, texType, mode> texRef, float x, float y,
-                                            float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DGrad(texture<unsigned short, texType, mode> texRef,
-                                                    float x, float y, float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DGrad(texture<ushort1, texType, mode> texRef, float x,
-                                             float y, float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DGrad(texture<ushort2, texType, mode> texRef, float x,
-                                             float y, float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DGrad(texture<ushort4, texType, mode> texRef, float x,
-                                             float y, float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex2DGrad(texture<int, texType, mode> texRef, float x, float y,
-                                         float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex2DGrad(texture<int1, texType, mode> texRef, float x, float y,
-                                          float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex2DGrad(texture<int2, texType, mode> texRef, float x, float y,
-                                          float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex2DGrad(texture<int4, texType, mode> texRef, float x, float y,
-                                          float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DGrad(texture<unsigned int, texType, mode> texRef,
-                                                  float x, float y, float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DGrad(texture<uint1, texType, mode> texRef, float x, float y,
-                                           float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DGrad(texture<uint2, texType, mode> texRef, float x, float y,
-                                           float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DGrad(texture<uint4, texType, mode> texRef, float x, float y,
-                                           float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex2DGrad(texture<float, texType, mode> texRef, float x, float y,
-                                           float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex2DGrad(texture<float1, texType, mode> texRef, float x, float y,
-                                            float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex2DGrad(texture<float2, texType, mode> texRef, float x, float y,
-                                            float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex2DGrad(texture<float4, texType, mode> texRef, float x, float y,
-                                            float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex2DGrad(texture<char, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float y,
-                                          float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex2DGrad(texture<char1, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float y,
-                                           float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex2DGrad(texture<char2, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float y,
-                                           float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex2DGrad(texture<char4, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float y,
-                                           float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DGrad(texture<unsigned char, texType, mode> texRef,
-                                                   hipTextureObject_t textureObject, float x,
-                                                   float y, float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DGrad(texture<uchar1, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, float y,
-                                            float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DGrad(texture<uchar2, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, float y,
-                                            float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DGrad(texture<uchar4, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, float y,
-                                            float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex2DGrad(texture<short, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float y,
-                                           float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex2DGrad(texture<short1, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, float y,
-                                            float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex2DGrad(texture<short2, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, float y,
-                                            float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex2DGrad(texture<short4, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, float y,
-                                            float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DGrad(texture<unsigned short, texType, mode> texRef,
-                                                    hipTextureObject_t textureObject, float x,
-                                                    float y, float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DGrad(texture<ushort1, texType, mode> texRef,
-                                             hipTextureObject_t textureObject, float x, float y,
-                                             float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DGrad(texture<ushort2, texType, mode> texRef,
-                                             hipTextureObject_t textureObject, float x, float y,
-                                             float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DGrad(texture<ushort4, texType, mode> texRef,
-                                             hipTextureObject_t textureObject, float x, float y,
-                                             float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex2DGrad(texture<int, texType, mode> texRef,
-                                         hipTextureObject_t textureObject, float x, float y,
-                                         float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex2DGrad(texture<int1, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float y,
-                                          float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex2DGrad(texture<int2, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float y,
-                                          float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex2DGrad(texture<int4, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float y,
-                                          float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DGrad(texture<unsigned int, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  float y, float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DGrad(texture<uint1, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float y,
-                                           float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DGrad(texture<uint2, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float y,
-                                           float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DGrad(texture<uint4, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float y,
-                                           float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex2DGrad(texture<float, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float y,
-                                           float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex2DGrad(texture<float1, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, float y,
-                                            float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex2DGrad(texture<float2, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, float y,
-                                            float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex2DGrad(texture<float4, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, float y,
-                                            float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_grad_2D(i, s, float2(x, y).data,
-                                          float2(dx.x, dx.y).data,
-                                          float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex3D(texture<char, texType, mode> texRef, float x, float y,
-                                      float z) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex3D(texture<char1, texType, mode> texRef, float x, float y,
-                                       float z) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex3D(texture<char2, texType, mode> texRef, float x, float y,
-                                       float z) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex3D(texture<char4, texType, mode> texRef, float x, float y,
-                                       float z) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex3D(texture<unsigned char, texType, mode> texRef,
-                                               float x, float y, float z) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex3D(texture<uchar1, texType, mode> texRef, float x, float y,
-                                        float z) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex3D(texture<uchar2, texType, mode> texRef, float x, float y,
-                                        float z) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex3D(texture<uchar4, texType, mode> texRef, float x, float y,
-                                        float z) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex3D(texture<short, texType, mode> texRef, float x, float y,
-                                       float z) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex3D(texture<short1, texType, mode> texRef, float x, float y,
-                                        float z) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex3D(texture<short2, texType, mode> texRef, float x, float y,
-                                        float z) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex3D(texture<short4, texType, mode> texRef, float x, float y,
-                                        float z) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex3D(texture<unsigned short, texType, mode> texRef,
-                                                float x, float y, float z) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex3D(texture<ushort1, texType, mode> texRef, float x, float y,
-                                         float z) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex3D(texture<ushort2, texType, mode> texRef, float x, float y,
-                                         float z) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex3D(texture<ushort4, texType, mode> texRef, float x, float y,
-                                         float z) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex3D(texture<int, texType, mode> texRef, float x, float y,
-                                     float z) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex3D(texture<int1, texType, mode> texRef, float x, float y,
-                                      float z) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex3D(texture<int2, texType, mode> texRef, float x, float y,
-                                      float z) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex3D(texture<int4, texType, mode> texRef, float x, float y,
-                                      float z) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex3D(texture<unsigned int, texType, mode> texRef, float x,
-                                              float y, float z) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex3D(texture<uint1, texType, mode> texRef, float x, float y,
-                                       float z) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex3D(texture<uint2, texType, mode> texRef, float x, float y,
-                                       float z) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex3D(texture<uint4, texType, mode> texRef, float x, float y,
-                                       float z) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex3D(texture<float, texType, mode> texRef, float x, float y,
-                                       float z) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex3D(texture<float1, texType, mode> texRef, float x, float y,
-                                        float z) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex3D(texture<float2, texType, mode> texRef, float x, float y,
-                                        float z) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex3D(texture<float4, texType, mode> texRef, float x, float y,
-                                        float z) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex3D(texture<char, texType, mode> texRef,
-                                      hipTextureObject_t textureObject, float x, float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex3D(texture<char1, texType, mode> texRef,
-                                       hipTextureObject_t textureObject, float x, float y,
-                                       float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex3D(texture<char2, texType, mode> texRef,
-                                       hipTextureObject_t textureObject, float x, float y,
-                                       float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex3D(texture<char4, texType, mode> texRef,
-                                       hipTextureObject_t textureObject, float x, float y,
-                                       float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex3D(texture<unsigned char, texType, mode> texRef,
-                                               hipTextureObject_t textureObject, float x, float y,
-                                               float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex3D(texture<uchar1, texType, mode> texRef,
-                                        hipTextureObject_t textureObject, float x, float y,
-                                        float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex3D(texture<uchar2, texType, mode> texRef,
-                                        hipTextureObject_t textureObject, float x, float y,
-                                        float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex3D(texture<uchar4, texType, mode> texRef,
-                                        hipTextureObject_t textureObject, float x, float y,
-                                        float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex3D(texture<short, texType, mode> texRef,
-                                       hipTextureObject_t textureObject, float x, float y,
-                                       float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex3D(texture<short1, texType, mode> texRef,
-                                        hipTextureObject_t textureObject, float x, float y,
-                                        float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex3D(texture<short2, texType, mode> texRef,
-                                        hipTextureObject_t textureObject, float x, float y,
-                                        float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex3D(texture<short4, texType, mode> texRef,
-                                        hipTextureObject_t textureObject, float x, float y,
-                                        float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex3D(texture<unsigned short, texType, mode> texRef,
-                                                hipTextureObject_t textureObject, float x, float y,
-                                                float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex3D(texture<ushort1, texType, mode> texRef,
-                                         hipTextureObject_t textureObject, float x, float y,
-                                         float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex3D(texture<ushort2, texType, mode> texRef,
-                                         hipTextureObject_t textureObject, float x, float y,
-                                         float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex3D(texture<ushort4, texType, mode> texRef,
-                                         hipTextureObject_t textureObject, float x, float y,
-                                         float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex3D(texture<int, texType, mode> texRef,
-                                     hipTextureObject_t textureObject, float x, float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex3D(texture<int1, texType, mode> texRef,
-                                      hipTextureObject_t textureObject, float x, float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex3D(texture<int2, texType, mode> texRef,
-                                      hipTextureObject_t textureObject, float x, float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex3D(texture<int4, texType, mode> texRef,
-                                      hipTextureObject_t textureObject, float x, float y, float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex3D(texture<unsigned int, texType, mode> texRef,
-                                              hipTextureObject_t textureObject, float x, float y,
-                                              float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex3D(texture<uint1, texType, mode> texRef,
-                                       hipTextureObject_t textureObject, float x, float y,
-                                       float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex3D(texture<uint2, texType, mode> texRef,
-                                       hipTextureObject_t textureObject, float x, float y,
-                                       float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex3D(texture<uint4, texType, mode> texRef,
-                                       hipTextureObject_t textureObject, float x, float y,
-                                       float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex3D(texture<float, texType, mode> texRef,
-                                       hipTextureObject_t textureObject, float x, float y,
-                                       float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex3D(texture<float1, texType, mode> texRef,
-                                        hipTextureObject_t textureObject, float x, float y,
-                                        float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex3D(texture<float2, texType, mode> texRef,
-                                        hipTextureObject_t textureObject, float x, float y,
-                                        float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex3D(texture<float4, texType, mode> texRef,
-                                        hipTextureObject_t textureObject, float x, float y,
-                                        float z) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex3DLod(texture<char, texType, mode> texRef, float x, float y,
-                                         float z, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex3DLod(texture<char1, texType, mode> texRef, float x, float y,
-                                          float z, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex3DLod(texture<char2, texType, mode> texRef, float x, float y,
-                                          float z, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex3DLod(texture<char4, texType, mode> texRef, float x, float y,
-                                          float z, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex3DLod(texture<unsigned char, texType, mode> texRef,
-                                                  float x, float y, float z, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex3DLod(texture<uchar1, texType, mode> texRef, float x, float y,
-                                           float z, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex3DLod(texture<uchar2, texType, mode> texRef, float x, float y,
-                                           float z, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex3DLod(texture<uchar4, texType, mode> texRef, float x, float y,
-                                           float z, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex3DLod(texture<int, texType, mode> texRef, float x, float y,
-                                        float z, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex3DLod(texture<int1, texType, mode> texRef, float x, float y,
-                                         float z, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex3DLod(texture<int2, texType, mode> texRef, float x, float y,
-                                         float z, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex3DLod(texture<int4, texType, mode> texRef, float x, float y,
-                                         float z, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex3DLod(texture<unsigned int, texType, mode> texRef,
-                                                 float x, float y, float z, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex3DLod(texture<uint1, texType, mode> texRef, float x, float y,
-                                          float z, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex3DLod(texture<uint2, texType, mode> texRef, float x, float y,
-                                          float z, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex3DLod(texture<uint4, texType, mode> texRef, float x, float y,
-                                          float z, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex3DLod(texture<float, texType, mode> texRef, float x, float y,
-                                          float z, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex3DLod(texture<float1, texType, mode> texRef, float x, float y,
-                                           float z, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex3DLod(texture<float2, texType, mode> texRef, float x, float y,
-                                           float z, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex3DLod(texture<float4, texType, mode> texRef, float x, float y,
-                                           float z, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex3DLod(texture<char, texType, mode> texRef,
-                                         hipTextureObject_t textureObject, float x, float y,
-                                         float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex3DLod(texture<char1, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float y,
-                                          float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex3DLod(texture<char2, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float y,
-                                          float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex3DLod(texture<char4, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float y,
-                                          float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex3DLod(texture<unsigned char, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  float y, float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex3DLod(texture<uchar1, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float y,
-                                           float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex3DLod(texture<uchar2, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float y,
-                                           float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex3DLod(texture<uchar4, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float y,
-                                           float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex3DLod(texture<int, texType, mode> texRef,
-                                        hipTextureObject_t textureObject, float x, float y, float z,
-                                        float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex3DLod(texture<int1, texType, mode> texRef,
-                                         hipTextureObject_t textureObject, float x, float y,
-                                         float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex3DLod(texture<int2, texType, mode> texRef,
-                                         hipTextureObject_t textureObject, float x, float y,
-                                         float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex3DLod(texture<int4, texType, mode> texRef,
-                                         hipTextureObject_t textureObject, float x, float y,
-                                         float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex3DLod(texture<unsigned int, texType, mode> texRef,
-                                                 hipTextureObject_t textureObject, float x, float y,
-                                                 float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex3DLod(texture<uint1, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float y,
-                                          float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex3DLod(texture<uint2, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float y,
-                                          float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex3DLod(texture<uint4, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float y,
-                                          float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex3DLod(texture<float, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float y,
-                                          float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex3DLod(texture<float1, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float y,
-                                           float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex3DLod(texture<float2, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float y,
-                                           float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex3DLod(texture<float4, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float y,
-                                           float z, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data,
-                                         level);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex3DGrad(texture<char, texType, mode> texRef, float x, float y,
-                                          float z, float4 dx, float4 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex3DGrad(texture<char1, texType, mode> texRef, float x, float y,
-                                           float z, float4 dx, float4 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex3DGrad(texture<char2, texType, mode> texRef, float x, float y,
-                                           float z, float4 dx, float4 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex3DGrad(texture<char4, texType, mode> texRef, float x, float y,
-                                           float z, float4 dx, float4 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex3DGrad(texture<unsigned char, texType, mode> texRef,
-                                                   float x, float y, float z, float4 dx,
-                                                   float4 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex3DGrad(texture<uchar1, texType, mode> texRef, float x, float y,
-                                            float z, float4 dx, float4 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex3DGrad(texture<uchar2, texType, mode> texRef, float x, float y,
-                                            float z, float4 dx, float4 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex3DGrad(texture<uchar4, texType, mode> texRef, float x, float y,
-                                            float z, float4 dx, float4 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex3DGrad(texture<short, texType, mode> texRef, float x, float y,
-                                           float z, float4 dx, float4 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex3DGrad(texture<short1, texType, mode> texRef, float x, float y,
-                                            float z, float4 dx, float4 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex3DGrad(texture<short2, texType, mode> texRef, float x, float y,
-                                            float z, float4 dx, float4 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex3DGrad(texture<short4, texType, mode> texRef, float x, float y,
-                                            float z, float4 dx, float4 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex3DGrad(texture<unsigned short, texType, mode> texRef,
-                                                    float x, float y, float z, float4 dx,
-                                                    float4 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex3DGrad(texture<ushort1, texType, mode> texRef, float x,
-                                             float y, float z, float4 dx, float4 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex3DGrad(texture<ushort2, texType, mode> texRef, float x,
-                                             float y, float z, float4 dx, float4 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex3DGrad(texture<ushort4, texType, mode> texRef, float x,
-                                             float y, float z, float4 dx, float4 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex3DGrad(texture<int, texType, mode> texRef, float x, float y,
-                                         float z, float4 dx, float4 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex3DGrad(texture<int1, texType, mode> texRef, float x, float y,
-                                          float z, float4 dx, float4 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex3DGrad(texture<int2, texType, mode> texRef, float x, float y,
-                                          float z, float4 dx, float4 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex3DGrad(texture<int4, texType, mode> texRef, float x, float y,
-                                          float z, float4 dx, float4 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex3DGrad(texture<unsigned int, texType, mode> texRef,
-                                                  float x, float y, float z, float4 dx, float4 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex3DGrad(texture<uint1, texType, mode> texRef, float x, float y,
-                                           float z, float4 dx, float4 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex3DGrad(texture<uint2, texType, mode> texRef, float x, float y,
-                                           float z, float4 dx, float4 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex3DGrad(texture<uint4, texType, mode> texRef, float x, float y,
-                                           float z, float4 dx, float4 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex3DGrad(texture<float, texType, mode> texRef, float x, float y,
-                                           float z, float4 dx, float4 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex3DGrad(texture<float1, texType, mode> texRef, float x, float y,
-                                            float z, float4 dx, float4 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex3DGrad(texture<float2, texType, mode> texRef, float x, float y,
-                                            float z, float4 dx, float4 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex3DGrad(texture<float4, texType, mode> texRef, float x, float y,
-                                            float z, float4 dx, float4 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex3DGrad(texture<char, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float y,
-                                          float z, float4 dx, float4 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex3DGrad(texture<char1, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float y,
-                                           float z, float4 dx, float4 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex3DGrad(texture<char2, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float y,
-                                           float z, float4 dx, float4 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex3DGrad(texture<char4, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float y,
-                                           float z, float4 dx, float4 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex3DGrad(texture<unsigned char, texType, mode> texRef,
-                                                   hipTextureObject_t textureObject, float x,
-                                                   float y, float z, float4 dx, float4 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex3DGrad(texture<uchar1, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, float y,
-                                            float z, float4 dx, float4 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex3DGrad(texture<uchar2, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, float y,
-                                            float z, float4 dx, float4 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex3DGrad(texture<uchar4, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, float y,
-                                            float z, float4 dx, float4 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex3DGrad(texture<short, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float y,
-                                           float z, float4 dx, float4 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex3DGrad(texture<short1, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, float y,
-                                            float z, float4 dx, float4 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex3DGrad(texture<short2, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, float y,
-                                            float z, float4 dx, float4 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex3DGrad(texture<short4, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, float y,
-                                            float z, float4 dx, float4 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex3DGrad(texture<unsigned short, texType, mode> texRef,
-                                                    hipTextureObject_t textureObject, float x,
-                                                    float y, float z, float4 dx, float4 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex3DGrad(texture<ushort1, texType, mode> texRef,
-                                             hipTextureObject_t textureObject, float x, float y,
-                                             float z, float4 dx, float4 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex3DGrad(texture<ushort2, texType, mode> texRef,
-                                             hipTextureObject_t textureObject, float x, float y,
-                                             float z, float4 dx, float4 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex3DGrad(texture<ushort4, texType, mode> texRef,
-                                             hipTextureObject_t textureObject, float x, float y,
-                                             float z, float4 dx, float4 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex3DGrad(texture<int, texType, mode> texRef,
-                                         hipTextureObject_t textureObject, float x, float y,
-                                         float z, float4 dx, float4 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex3DGrad(texture<int1, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float y,
-                                          float z, float4 dx, float4 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex3DGrad(texture<int2, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float y,
-                                          float z, float4 dx, float4 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex3DGrad(texture<int4, texType, mode> texRef,
-                                          hipTextureObject_t textureObject, float x, float y,
-                                          float z, float4 dx, float4 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex3DGrad(texture<unsigned int, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  float y, float z, float4 dx, float4 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex3DGrad(texture<uint1, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float y,
-                                           float z, float4 dx, float4 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex3DGrad(texture<uint2, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float y,
-                                           float z, float4 dx, float4 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex3DGrad(texture<uint4, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float y,
-                                           float z, float4 dx, float4 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex3DGrad(texture<float, texType, mode> texRef,
-                                           hipTextureObject_t textureObject, float x, float y,
-                                           float z, float4 dx, float4 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex3DGrad(texture<float1, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, float y,
-                                            float z, float4 dx, float4 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex3DGrad(texture<float2, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, float y,
-                                            float z, float4 dx, float4 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex3DGrad(texture<float4, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, float y,
-                                            float z, float4 dx, float4 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data,
-                                    float4(dx.x, dx.y, dx.z, dx.w).data,
-                                    float4(dy.x, dy.y, dy.z, dy.w).data);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex1DLayered(texture<char, texType, mode> texRef, float x,
-                                             int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLayered(texture<char1, texType, mode> texRef, float x,
-                                              int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLayered(texture<char2, texType, mode> texRef, float x,
-                                              int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLayered(texture<char4, texType, mode> texRef, float x,
-                                              int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLayered(texture<unsigned char, texType, mode> texRef,
-                                                      float x, int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLayered(texture<uchar1, texType, mode> texRef, float x,
-                                               int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLayered(texture<uchar2, texType, mode> texRef, float x,
-                                               int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLayered(texture<uchar4, texType, mode> texRef, float x,
-                                               int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex1DLayered(texture<short, texType, mode> texRef, float x,
-                                              int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLayered(texture<short1, texType, mode> texRef, float x,
-                                               int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLayered(texture<short2, texType, mode> texRef, float x,
-                                               int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLayered(texture<short4, texType, mode> texRef, float x,
-                                               int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLayered(
-    texture<unsigned short, texType, mode> texRef, float x, int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLayered(texture<ushort1, texType, mode> texRef, float x,
-                                                int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLayered(texture<ushort2, texType, mode> texRef, float x,
-                                                int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLayered(texture<ushort4, texType, mode> texRef, float x,
-                                                int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex1DLayered(texture<int, texType, mode> texRef, float x,
-                                            int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLayered(texture<int1, texType, mode> texRef, float x,
-                                             int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLayered(texture<int2, texType, mode> texRef, float x,
-                                             int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLayered(texture<int4, texType, mode> texRef, float x,
-                                             int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLayered(texture<unsigned int, texType, mode> texRef,
-                                                     float x, int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLayered(texture<uint1, texType, mode> texRef, float x,
-                                              int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLayered(texture<uint2, texType, mode> texRef, float x,
-                                              int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLayered(texture<uint4, texType, mode> texRef, float x,
-                                              int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex1DLayered(texture<float, texType, mode> texRef, float x,
-                                              int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLayered(texture<float1, texType, mode> texRef, float x,
-                                               int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLayered(texture<float2, texType, mode> texRef, float x,
-                                               int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLayered(texture<float4, texType, mode> texRef, float x,
-                                               int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex1DLayered(texture<char, texType, mode> texRef,
-                                             hipTextureObject_t textureObject, float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLayered(texture<char1, texType, mode> texRef,
-                                              hipTextureObject_t textureObject, float x,
-                                              int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLayered(texture<char2, texType, mode> texRef,
-                                              hipTextureObject_t textureObject, float x,
-                                              int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLayered(texture<char4, texType, mode> texRef,
-                                              hipTextureObject_t textureObject, float x,
-                                              int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLayered(texture<unsigned char, texType, mode> texRef,
-                                                      hipTextureObject_t textureObject, float x,
-                                                      int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLayered(texture<uchar1, texType, mode> texRef,
-                                               hipTextureObject_t textureObject, float x,
-                                               int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLayered(texture<uchar2, texType, mode> texRef,
-                                               hipTextureObject_t textureObject, float x,
-                                               int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLayered(texture<uchar4, texType, mode> texRef,
-                                               hipTextureObject_t textureObject, float x,
-                                               int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex1DLayered(texture<short, texType, mode> texRef,
-                                              hipTextureObject_t textureObject, float x,
-                                              int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLayered(texture<short1, texType, mode> texRef,
-                                               hipTextureObject_t textureObject, float x,
-                                               int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLayered(texture<short2, texType, mode> texRef,
-                                               hipTextureObject_t textureObject, float x,
-                                               int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLayered(texture<short4, texType, mode> texRef,
-                                               hipTextureObject_t textureObject, float x,
-                                               int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLayered(
-    texture<unsigned short, texType, mode> texRef, hipTextureObject_t textureObject, float x,
-    int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLayered(texture<ushort1, texType, mode> texRef,
-                                                hipTextureObject_t textureObject, float x,
-                                                int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLayered(texture<ushort2, texType, mode> texRef,
-                                                hipTextureObject_t textureObject, float x,
-                                                int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLayered(texture<ushort4, texType, mode> texRef,
-                                                hipTextureObject_t textureObject, float x,
-                                                int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex1DLayered(texture<int, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLayered(texture<int1, texType, mode> texRef,
-                                             hipTextureObject_t textureObject, float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLayered(texture<int2, texType, mode> texRef,
-                                             hipTextureObject_t textureObject, float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLayered(texture<int4, texType, mode> texRef,
-                                             hipTextureObject_t textureObject, float x, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLayered(texture<unsigned int, texType, mode> texRef,
-                                                     hipTextureObject_t textureObject, float x,
-                                                     int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLayered(texture<uint1, texType, mode> texRef,
-                                              hipTextureObject_t textureObject, float x,
-                                              int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLayered(texture<uint2, texType, mode> texRef,
-                                              hipTextureObject_t textureObject, float x,
-                                              int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLayered(texture<uint4, texType, mode> texRef,
-                                              hipTextureObject_t textureObject, float x,
-                                              int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex1DLayered(texture<float, texType, mode> texRef,
-                                              hipTextureObject_t textureObject, float x,
-                                              int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLayered(texture<float1, texType, mode> texRef,
-                                               hipTextureObject_t textureObject, float x,
-                                               int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLayered(texture<float2, texType, mode> texRef,
-                                               hipTextureObject_t textureObject, float x,
-                                               int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLayered(texture<float4, texType, mode> texRef,
-                                               hipTextureObject_t textureObject, float x,
-                                               int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex1DLayeredLod(texture<char, texType, mode> texRef, float x,
-                                                int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLayeredLod(texture<char1, texType, mode> texRef, float x,
-                                                 int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLayeredLod(texture<char2, texType, mode> texRef, float x,
-                                                 int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLayeredLod(texture<char4, texType, mode> texRef, float x,
-                                                 int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLayeredLod(
-    texture<unsigned char, texType, mode> texRef, float x, int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLayeredLod(texture<uchar1, texType, mode> texRef, float x,
-                                                  int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLayeredLod(texture<uchar2, texType, mode> texRef, float x,
-                                                  int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLayeredLod(texture<uchar4, texType, mode> texRef, float x,
-                                                  int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex1DLayeredLod(texture<short, texType, mode> texRef, float x,
-                                                 int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLayeredLod(texture<short1, texType, mode> texRef, float x,
-                                                  int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLayeredLod(texture<short2, texType, mode> texRef, float x,
-                                                  int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLayeredLod(texture<short4, texType, mode> texRef, float x,
-                                                  int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLayeredLod(
-    texture<unsigned short, texType, mode> texRef, float x, int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLayeredLod(texture<ushort1, texType, mode> texRef, float x,
-                                                   int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLayeredLod(texture<ushort2, texType, mode> texRef, float x,
-                                                   int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLayeredLod(texture<ushort4, texType, mode> texRef, float x,
-                                                   int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex1DLayeredLod(texture<int, texType, mode> texRef, float x,
-                                               int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLayeredLod(texture<int1, texType, mode> texRef, float x,
-                                                int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLayeredLod(texture<int2, texType, mode> texRef, float x,
-                                                int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLayeredLod(texture<int4, texType, mode> texRef, float x,
-                                                int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLayeredLod(texture<unsigned int, texType, mode> texRef,
-                                                        float x, int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLayeredLod(texture<uint1, texType, mode> texRef, float x,
-                                                 int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLayeredLod(texture<uint2, texType, mode> texRef, float x,
-                                                 int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLayeredLod(texture<uint4, texType, mode> texRef, float x,
-                                                 int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex1DLayeredLod(texture<float, texType, mode> texRef, float x,
-                                                 int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLayeredLod(texture<float1, texType, mode> texRef, float x,
-                                                  int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLayeredLod(texture<float2, texType, mode> texRef, float x,
-                                                  int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLayeredLod(texture<float4, texType, mode> texRef, float x,
-                                                  int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex1DLayeredLod(texture<char, texType, mode> texRef,
-                                                hipTextureObject_t textureObject, float x,
-                                                int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLayeredLod(texture<char1, texType, mode> texRef,
-                                                 hipTextureObject_t textureObject, float x,
-                                                 int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLayeredLod(texture<char2, texType, mode> texRef,
-                                                 hipTextureObject_t textureObject, float x,
-                                                 int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLayeredLod(texture<char4, texType, mode> texRef,
-                                                 hipTextureObject_t textureObject, float x,
-                                                 int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLayeredLod(
-    texture<unsigned char, texType, mode> texRef, hipTextureObject_t textureObject, float x,
-    int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLayeredLod(texture<uchar1, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLayeredLod(texture<uchar2, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLayeredLod(texture<uchar4, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex1DLayeredLod(texture<short, texType, mode> texRef,
-                                                 hipTextureObject_t textureObject, float x,
-                                                 int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLayeredLod(texture<short1, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLayeredLod(texture<short2, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLayeredLod(texture<short4, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLayeredLod(
-    texture<unsigned short, texType, mode> texRef, hipTextureObject_t textureObject, float x,
-    int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLayeredLod(texture<ushort1, texType, mode> texRef,
-                                                   hipTextureObject_t textureObject, float x,
-                                                   int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLayeredLod(texture<ushort2, texType, mode> texRef,
-                                                   hipTextureObject_t textureObject, float x,
-                                                   int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLayeredLod(texture<ushort4, texType, mode> texRef,
-                                                   hipTextureObject_t textureObject, float x,
-                                                   int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex1DLayeredLod(texture<int, texType, mode> texRef,
-                                               hipTextureObject_t textureObject, float x, int layer,
-                                               float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLayeredLod(texture<int1, texType, mode> texRef,
-                                                hipTextureObject_t textureObject, float x,
-                                                int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLayeredLod(texture<int2, texType, mode> texRef,
-                                                hipTextureObject_t textureObject, float x,
-                                                int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLayeredLod(texture<int4, texType, mode> texRef,
-                                                hipTextureObject_t textureObject, float x,
-                                                int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLayeredLod(texture<unsigned int, texType, mode> texRef,
-                                                        hipTextureObject_t textureObject, float x,
-                                                        int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLayeredLod(texture<uint1, texType, mode> texRef,
-                                                 hipTextureObject_t textureObject, float x,
-                                                 int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLayeredLod(texture<uint2, texType, mode> texRef,
-                                                 hipTextureObject_t textureObject, float x,
-                                                 int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLayeredLod(texture<uint4, texType, mode> texRef,
-                                                 hipTextureObject_t textureObject, float x,
-                                                 int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex1DLayeredLod(texture<float, texType, mode> texRef,
-                                                 hipTextureObject_t textureObject, float x,
-                                                 int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLayeredLod(texture<float1, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLayeredLod(texture<float2, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLayeredLod(texture<float4, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex1DLayeredGrad(texture<char, texType, mode> texRef, float x,
-                                                 int layer, float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex1DLayeredGrad(texture<char, texType, mode> texRef,
-                                                 hipTextureObject_t textureObject, float x,
-                                                 int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLayeredGrad(texture<char1, texType, mode> texRef, float x,
-                                                  int layer, float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLayeredGrad(texture<char1, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLayeredGrad(texture<char2, texType, mode> texRef, float x,
-                                                  int layer, float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLayeredGrad(texture<char2, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLayeredGrad(texture<char4, texType, mode> texRef, float x,
-                                                  int layer, float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLayeredGrad(texture<char4, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLayeredGrad(
-    texture<unsigned char, texType, mode> texRef, float x, int layer, float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLayeredGrad(
-    texture<unsigned char, texType, mode> texRef, hipTextureObject_t textureObject, float x,
-    int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLayeredGrad(texture<uchar1, texType, mode> texRef, float x,
-                                                   int layer, float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLayeredGrad(texture<uchar1, texType, mode> texRef,
-                                                   hipTextureObject_t textureObject, float x,
-                                                   int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLayeredGrad(texture<uchar2, texType, mode> texRef, float x,
-                                                   int layer, float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLayeredGrad(texture<uchar2, texType, mode> texRef,
-                                                   hipTextureObject_t textureObject, float x,
-                                                   int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLayeredGrad(texture<uchar4, texType, mode> texRef, float x,
-                                                   int layer, float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLayeredGrad(texture<uchar4, texType, mode> texRef,
-                                                   hipTextureObject_t textureObject, float x,
-                                                   int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex1DLayeredGrad(texture<short, texType, mode> texRef, float x,
-                                                  int layer, float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex1DLayeredGrad(texture<short, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLayeredGrad(texture<short1, texType, mode> texRef, float x,
-                                                   int layer, float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLayeredGrad(texture<short1, texType, mode> texRef,
-                                                   hipTextureObject_t textureObject, float x,
-                                                   int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLayeredGrad(texture<short2, texType, mode> texRef, float x,
-                                                   int layer, float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLayeredGrad(texture<short2, texType, mode> texRef,
-                                                   hipTextureObject_t textureObject, float x,
-                                                   int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLayeredGrad(texture<short4, texType, mode> texRef, float x,
-                                                   int layer, float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLayeredGrad(texture<short4, texType, mode> texRef,
-                                                   hipTextureObject_t textureObject, float x,
-                                                   int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLayeredGrad(
-    texture<unsigned short, texType, mode> texRef, float x, int layer, float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLayeredGrad(
-    texture<unsigned short, texType, mode> texRef, hipTextureObject_t textureObject, float x,
-    int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLayeredGrad(texture<ushort1, texType, mode> texRef, float x,
-                                                    int layer, float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLayeredGrad(texture<ushort1, texType, mode> texRef,
-                                                    hipTextureObject_t textureObject, float x,
-                                                    int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLayeredGrad(texture<ushort2, texType, mode> texRef, float x,
-                                                    int layer, float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLayeredGrad(texture<ushort2, texType, mode> texRef,
-                                                    hipTextureObject_t textureObject, float x,
-                                                    int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLayeredGrad(texture<ushort4, texType, mode> texRef, float x,
-                                                    int layer, float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLayeredGrad(texture<ushort4, texType, mode> texRef,
-                                                    hipTextureObject_t textureObject, float x,
-                                                    int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex1DLayeredGrad(texture<int, texType, mode> texRef, float x,
-                                                int layer, float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex1DLayeredGrad(texture<int, texType, mode> texRef,
-                                                hipTextureObject_t textureObject, float x,
-                                                int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLayeredGrad(texture<int1, texType, mode> texRef, float x,
-                                                 int layer, float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLayeredGrad(texture<int1, texType, mode> texRef,
-                                                 hipTextureObject_t textureObject, float x,
-                                                 int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLayeredGrad(texture<int2, texType, mode> texRef, float x,
-                                                 int layer, float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLayeredGrad(texture<int2, texType, mode> texRef,
-                                                 hipTextureObject_t textureObject, float x,
-                                                 int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLayeredGrad(texture<int4, texType, mode> texRef, float x,
-                                                 int layer, float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLayeredGrad(texture<int4, texType, mode> texRef,
-                                                 hipTextureObject_t textureObject, float x,
-                                                 int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLayeredGrad(
-    texture<unsigned int, texType, mode> texRef, float x, int layer, float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLayeredGrad(
-    texture<unsigned int, texType, mode> texRef, hipTextureObject_t textureObject, float x,
-    int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLayeredGrad(texture<uint1, texType, mode> texRef, float x,
-                                                  int layer, float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLayeredGrad(texture<uint1, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLayeredGrad(texture<uint2, texType, mode> texRef, float x,
-                                                  int layer, float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLayeredGrad(texture<uint2, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLayeredGrad(texture<uint4, texType, mode> texRef, float x,
-                                                  int layer, float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLayeredGrad(texture<uint4, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex1DLayeredGrad(texture<float, texType, mode> texRef, float x,
-                                                  int layer, float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex1DLayeredGrad(texture<float, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLayeredGrad(texture<float1, texType, mode> texRef, float x,
-                                                   int layer, float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLayeredGrad(texture<float1, texType, mode> texRef,
-                                                   hipTextureObject_t textureObject, float x,
-                                                   int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLayeredGrad(texture<float2, texType, mode> texRef, float x,
-                                                   int layer, float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLayeredGrad(texture<float2, texType, mode> texRef,
-                                                   hipTextureObject_t textureObject, float x,
-                                                   int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLayeredGrad(texture<float4, texType, mode> texRef, float x,
-                                                   int layer, float dx, float dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLayeredGrad(texture<float4, texType, mode> texRef,
-                                                   hipTextureObject_t textureObject, float x,
-                                                   int layer, float dx, float dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dx, dy);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex2DLayered(texture<char, texType, mode> texRef, float x, float y,
-                                             int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex2DLayered(texture<char, texType, mode> texRef,
-                                             hipTextureObject_t textureObject, float x, float y,
-                                             int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLayered(texture<char1, texType, mode> texRef, float x,
-                                              float y, int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLayered(texture<char1, texType, mode> texRef,
-                                              hipTextureObject_t textureObject, float x, float y,
-                                              int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLayered(texture<char2, texType, mode> texRef, float x,
-                                              float y, int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLayered(texture<char2, texType, mode> texRef,
-                                              hipTextureObject_t textureObject, float x, float y,
-                                              int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLayered(texture<char4, texType, mode> texRef, float x,
-                                              float y, int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLayered(texture<char4, texType, mode> texRef,
-                                              hipTextureObject_t textureObject, float x, float y,
-                                              int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLayered(texture<unsigned char, texType, mode> texRef,
-                                                      float x, float y, int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLayered(texture<unsigned char, texType, mode> texRef,
-                                                      hipTextureObject_t textureObject, float x,
-                                                      float y, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLayered(texture<uchar1, texType, mode> texRef, float x,
-                                               float y, int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLayered(texture<uchar1, texType, mode> texRef,
-                                               hipTextureObject_t textureObject, float x, float y,
-                                               int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLayered(texture<uchar2, texType, mode> texRef, float x,
-                                               float y, int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLayered(texture<uchar2, texType, mode> texRef,
-                                               hipTextureObject_t textureObject, float x, float y,
-                                               int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLayered(texture<uchar4, texType, mode> texRef, float x,
-                                               float y, int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLayered(texture<uchar4, texType, mode> texRef,
-                                               hipTextureObject_t textureObject, float x, float y,
-                                               int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex2DLayered(texture<short, texType, mode> texRef, float x,
-                                              float y, int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex2DLayered(texture<short, texType, mode> texRef,
-                                              hipTextureObject_t textureObject, float x, float y,
-                                              int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLayered(texture<short1, texType, mode> texRef, float x,
-                                               float y, int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLayered(texture<short1, texType, mode> texRef,
-                                               hipTextureObject_t textureObject, float x, float y,
-                                               int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLayered(texture<short2, texType, mode> texRef, float x,
-                                               float y, int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLayered(texture<short2, texType, mode> texRef,
-                                               hipTextureObject_t textureObject, float x, float y,
-                                               int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLayered(texture<short4, texType, mode> texRef, float x,
-                                               float y, int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLayered(texture<short4, texType, mode> texRef,
-                                               hipTextureObject_t textureObject, float x, float y,
-                                               int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLayered(
-    texture<unsigned short, texType, mode> texRef, float x, float y, int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLayered(
-    texture<unsigned short, texType, mode> texRef, hipTextureObject_t textureObject, float x,
-    float y, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLayered(texture<ushort1, texType, mode> texRef, float x,
-                                                float y, int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLayered(texture<ushort1, texType, mode> texRef,
-                                                hipTextureObject_t textureObject, float x, float y,
-                                                int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLayered(texture<ushort2, texType, mode> texRef, float x,
-                                                float y, int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLayered(texture<ushort2, texType, mode> texRef,
-                                                hipTextureObject_t textureObject, float x, float y,
-                                                int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLayered(texture<ushort4, texType, mode> texRef, float x,
-                                                float y, int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLayered(texture<ushort4, texType, mode> texRef,
-                                                hipTextureObject_t textureObject, float x, float y,
-                                                int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex2DLayered(texture<int, texType, mode> texRef, float x, float y,
-                                            int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex2DLayered(texture<int, texType, mode> texRef,
-                                            hipTextureObject_t textureObject, float x, float y,
-                                            int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLayered(texture<int1, texType, mode> texRef, float x, float y,
-                                             int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLayered(texture<int1, texType, mode> texRef,
-                                             hipTextureObject_t textureObject, float x, float y,
-                                             int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLayered(texture<int2, texType, mode> texRef, float x, float y,
-                                             int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLayered(texture<int2, texType, mode> texRef,
-                                             hipTextureObject_t textureObject, float x, float y,
-                                             int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLayered(texture<int4, texType, mode> texRef, float x, float y,
-                                             int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLayered(texture<int4, texType, mode> texRef,
-                                             hipTextureObject_t textureObject, float x, float y,
-                                             int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLayered(texture<unsigned int, texType, mode> texRef,
-                                                     float x, float y, int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLayered(texture<unsigned int, texType, mode> texRef,
-                                                     hipTextureObject_t textureObject, float x,
-                                                     float y, int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLayered(texture<uint1, texType, mode> texRef, float x,
-                                              float y, int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLayered(texture<uint1, texType, mode> texRef,
-                                              hipTextureObject_t textureObject, float x, float y,
-                                              int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLayered(texture<uint2, texType, mode> texRef, float x,
-                                              float y, int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLayered(texture<uint2, texType, mode> texRef,
-                                              hipTextureObject_t textureObject, float x, float y,
-                                              int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLayered(texture<uint4, texType, mode> texRef, float x,
-                                              float y, int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLayered(texture<uint4, texType, mode> texRef,
-                                              hipTextureObject_t textureObject, float x, float y,
-                                              int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex2DLayered(texture<float, texType, mode> texRef, float x,
-                                              float y, int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex2DLayered(texture<float, texType, mode> texRef,
-                                              hipTextureObject_t textureObject, float x, float y,
-                                              int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLayered(texture<float1, texType, mode> texRef, float x,
-                                               float y, int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLayered(texture<float1, texType, mode> texRef,
-                                               hipTextureObject_t textureObject, float x, float y,
-                                               int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLayered(texture<float2, texType, mode> texRef, float x,
-                                               float y, int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLayered(texture<float2, texType, mode> texRef,
-                                               hipTextureObject_t textureObject, float x, float y,
-                                               int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLayered(texture<float4, texType, mode> texRef, float x,
-                                               float y, int layer) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLayered(texture<float4, texType, mode> texRef,
-                                               hipTextureObject_t textureObject, float x, float y,
-                                               int layer) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex2DLayeredLod(texture<char, texType, mode> texRef, float x,
-                                                float y, int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex2DLayeredLod(texture<char, texType, mode> texRef,
-                                                hipTextureObject_t textureObject, float x, float y,
-                                                int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLayeredLod(texture<char1, texType, mode> texRef, float x,
-                                                 float y, int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLayeredLod(texture<char1, texType, mode> texRef,
-                                                 hipTextureObject_t textureObject, float x, float y,
-                                                 int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLayeredLod(texture<char2, texType, mode> texRef, float x,
-                                                 float y, int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLayeredLod(texture<char2, texType, mode> texRef,
-                                                 hipTextureObject_t textureObject, float x, float y,
-                                                 int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLayeredLod(texture<char4, texType, mode> texRef, float x,
-                                                 float y, int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLayeredLod(texture<char4, texType, mode> texRef,
-                                                 hipTextureObject_t textureObject, float x, float y,
-                                                 int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLayeredLod(
-    texture<unsigned char, texType, mode> texRef, float x, float y, int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLayeredLod(
-    texture<unsigned char, texType, mode> texRef, hipTextureObject_t textureObject, float x,
-    float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLayeredLod(texture<uchar1, texType, mode> texRef, float x,
-                                                  float y, int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLayeredLod(texture<uchar1, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLayeredLod(texture<uchar2, texType, mode> texRef, float x,
-                                                  float y, int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLayeredLod(texture<uchar2, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLayeredLod(texture<uchar4, texType, mode> texRef, float x,
-                                                  float y, int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLayeredLod(texture<uchar4, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex2DLayeredLod(texture<short, texType, mode> texRef, float x,
-                                                 float y, int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex2DLayeredLod(texture<short, texType, mode> texRef,
-                                                 hipTextureObject_t textureObject, float x, float y,
-                                                 int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLayeredLod(texture<short1, texType, mode> texRef, float x,
-                                                  float y, int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLayeredLod(texture<short1, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLayeredLod(texture<short2, texType, mode> texRef, float x,
-                                                  float y, int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLayeredLod(texture<short2, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLayeredLod(texture<short4, texType, mode> texRef, float x,
-                                                  float y, int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLayeredLod(texture<short4, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLayeredLod(
-    texture<unsigned short, texType, mode> texRef, float x, float y, int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLayeredLod(
-    texture<unsigned short, texType, mode> texRef, hipTextureObject_t textureObject, float x,
-    float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLayeredLod(texture<ushort1, texType, mode> texRef, float x,
-                                                   float y, int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLayeredLod(texture<ushort1, texType, mode> texRef,
-                                                   hipTextureObject_t textureObject, float x,
-                                                   float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLayeredLod(texture<ushort2, texType, mode> texRef, float x,
-                                                   float y, int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLayeredLod(texture<ushort2, texType, mode> texRef,
-                                                   hipTextureObject_t textureObject, float x,
-                                                   float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLayeredLod(texture<ushort4, texType, mode> texRef, float x,
-                                                   float y, int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLayeredLod(texture<ushort4, texType, mode> texRef,
-                                                   hipTextureObject_t textureObject, float x,
-                                                   float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex2DLayeredLod(texture<int, texType, mode> texRef, float x, float y,
-                                               int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex2DLayeredLod(texture<int, texType, mode> texRef,
-                                               hipTextureObject_t textureObject, float x, float y,
-                                               int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLayeredLod(texture<int1, texType, mode> texRef, float x,
-                                                float y, int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLayeredLod(texture<int1, texType, mode> texRef,
-                                                hipTextureObject_t textureObject, float x, float y,
-                                                int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLayeredLod(texture<int2, texType, mode> texRef, float x,
-                                                float y, int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLayeredLod(texture<int2, texType, mode> texRef,
-                                                hipTextureObject_t textureObject, float x, float y,
-                                                int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLayeredLod(texture<int4, texType, mode> texRef, float x,
-                                                float y, int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLayeredLod(texture<int4, texType, mode> texRef,
-                                                hipTextureObject_t textureObject, float x, float y,
-                                                int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLayeredLod(texture<unsigned int, texType, mode> texRef,
-                                                        float x, float y, int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLayeredLod(texture<unsigned int, texType, mode> texRef,
-                                                        hipTextureObject_t textureObject, float x,
-                                                        float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLayeredLod(texture<uint1, texType, mode> texRef, float x,
-                                                 float y, int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLayeredLod(texture<uint1, texType, mode> texRef,
-                                                 hipTextureObject_t textureObject, float x, float y,
-                                                 int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLayeredLod(texture<uint2, texType, mode> texRef, float x,
-                                                 float y, int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLayeredLod(texture<uint2, texType, mode> texRef,
-                                                 hipTextureObject_t textureObject, float x, float y,
-                                                 int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLayeredLod(texture<uint4, texType, mode> texRef, float x,
-                                                 float y, int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLayeredLod(texture<uint4, texType, mode> texRef,
-                                                 hipTextureObject_t textureObject, float x, float y,
-                                                 int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex2DLayeredLod(texture<float, texType, mode> texRef, float x,
-                                                 float y, int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex2DLayeredLod(texture<float, texType, mode> texRef,
-                                                 hipTextureObject_t textureObject, float x, float y,
-                                                 int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLayeredLod(texture<float1, texType, mode> texRef, float x,
-                                                  float y, int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLayeredLod(texture<float1, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLayeredLod(texture<float2, texType, mode> texRef, float x,
-                                                  float y, int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLayeredLod(texture<float2, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLayeredLod(texture<float4, texType, mode> texRef, float x,
-                                                  float y, int layer, float level) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLayeredLod(texture<float4, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  float y, int layer, float level) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f = __ockl_image_sample_lod_2Da(
-        i, s, float4(x, y, layer, 0.0f).data, level);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-////////////////////////////////////////////////////////////
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex2DLayeredGrad(texture<char, texType, mode> texRef, float x,
-                                                 float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char tex2DLayeredGrad(texture<char, texType, mode> texRef,
-                                                 hipTextureObject_t textureObject, float x, float y,
-                                                 int layer, float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_CHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLayeredGrad(texture<char1, texType, mode> texRef, float x,
-                                                  float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLayeredGrad(texture<char1, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_CHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLayeredGrad(texture<char2, texType, mode> texRef, float x,
-                                                  float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLayeredGrad(texture<char2, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_CHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLayeredGrad(texture<char4, texType, mode> texRef, float x,
-                                                  float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLayeredGrad(texture<char4, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_CHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLayeredGrad(
-    texture<unsigned char, texType, mode> texRef, float x, float y, int layer, float2 dx,
-    float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLayeredGrad(
-    texture<unsigned char, texType, mode> texRef, hipTextureObject_t textureObject, float x,
-    float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UCHAR;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLayeredGrad(texture<uchar1, texType, mode> texRef, float x,
-                                                   float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLayeredGrad(texture<uchar1, texType, mode> texRef,
-                                                   hipTextureObject_t textureObject, float x,
-                                                   float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UCHAR_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLayeredGrad(texture<uchar2, texType, mode> texRef, float x,
-                                                   float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLayeredGrad(texture<uchar2, texType, mode> texRef,
-                                                   hipTextureObject_t textureObject, float x,
-                                                   float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UCHAR_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLayeredGrad(texture<uchar4, texType, mode> texRef, float x,
-                                                   float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLayeredGrad(texture<uchar4, texType, mode> texRef,
-                                                   hipTextureObject_t textureObject, float x,
-                                                   float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UCHAR_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex2DLayeredGrad(texture<short, texType, mode> texRef, float x,
-                                                  float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short tex2DLayeredGrad(texture<short, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_SHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLayeredGrad(texture<short1, texType, mode> texRef, float x,
-                                                   float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLayeredGrad(texture<short1, texType, mode> texRef,
-                                                   hipTextureObject_t textureObject, float x,
-                                                   float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_SHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLayeredGrad(texture<short2, texType, mode> texRef, float x,
-                                                   float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLayeredGrad(texture<short2, texType, mode> texRef,
-                                                   hipTextureObject_t textureObject, float x,
-                                                   float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_SHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLayeredGrad(texture<short4, texType, mode> texRef, float x,
-                                                   float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLayeredGrad(texture<short4, texType, mode> texRef,
-                                                   hipTextureObject_t textureObject, float x,
-                                                   float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_SHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLayeredGrad(
-    texture<unsigned short, texType, mode> texRef, float x, float y, int layer, float2 dx,
-    float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLayeredGrad(
-    texture<unsigned short, texType, mode> texRef, hipTextureObject_t textureObject, float x,
-    float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_USHORT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLayeredGrad(texture<ushort1, texType, mode> texRef, float x,
-                                                    float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLayeredGrad(texture<ushort1, texType, mode> texRef,
-                                                    hipTextureObject_t textureObject, float x,
-                                                    float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_USHORT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLayeredGrad(texture<ushort2, texType, mode> texRef, float x,
-                                                    float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLayeredGrad(texture<ushort2, texType, mode> texRef,
-                                                    hipTextureObject_t textureObject, float x,
-                                                    float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_USHORT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLayeredGrad(texture<ushort4, texType, mode> texRef, float x,
-                                                    float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLayeredGrad(texture<ushort4, texType, mode> texRef,
-                                                    hipTextureObject_t textureObject, float x,
-                                                    float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_USHORT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex2DLayeredGrad(texture<int, texType, mode> texRef, float x,
-                                                float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int tex2DLayeredGrad(texture<int, texType, mode> texRef,
-                                                hipTextureObject_t textureObject, float x, float y,
-                                                int layer, float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_INT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLayeredGrad(texture<int1, texType, mode> texRef, float x,
-                                                 float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLayeredGrad(texture<int1, texType, mode> texRef,
-                                                 hipTextureObject_t textureObject, float x, float y,
-                                                 int layer, float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_INT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLayeredGrad(texture<int2, texType, mode> texRef, float x,
-                                                 float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLayeredGrad(texture<int2, texType, mode> texRef,
-                                                 hipTextureObject_t textureObject, float x, float y,
-                                                 int layer, float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_INT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLayeredGrad(texture<int4, texType, mode> texRef, float x,
-                                                 float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLayeredGrad(texture<int4, texType, mode> texRef,
-                                                 hipTextureObject_t textureObject, float x, float y,
-                                                 int layer, float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_INT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLayeredGrad(
-    texture<unsigned int, texType, mode> texRef, float x, float y, int layer, float2 dx,
-    float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLayeredGrad(
-    texture<unsigned int, texType, mode> texRef, hipTextureObject_t textureObject, float x, float y,
-    int layer, float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UINT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLayeredGrad(texture<uint1, texType, mode> texRef, float x,
-                                                  float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLayeredGrad(texture<uint1, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UINT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLayeredGrad(texture<uint2, texType, mode> texRef, float x,
-                                                  float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLayeredGrad(texture<uint2, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UINT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLayeredGrad(texture<uint4, texType, mode> texRef, float x,
-                                                  float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLayeredGrad(texture<uint4, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_UINT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex2DLayeredGrad(texture<float, texType, mode> texRef, float x,
-                                                  float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float tex2DLayeredGrad(texture<float, texType, mode> texRef,
-                                                  hipTextureObject_t textureObject, float x,
-                                                  float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_FLOAT;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLayeredGrad(texture<float1, texType, mode> texRef, float x,
-                                                   float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLayeredGrad(texture<float1, texType, mode> texRef,
-                                                   hipTextureObject_t textureObject, float x,
-                                                   float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_FLOAT_X;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLayeredGrad(texture<float2, texType, mode> texRef, float x,
-                                                   float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLayeredGrad(texture<float2, texType, mode> texRef,
-                                                   hipTextureObject_t textureObject, float x,
-                                                   float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_FLOAT_XY;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLayeredGrad(texture<float4, texType, mode> texRef, float x,
-                                                   float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_REF_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-
-template <int texType, enum hipTextureReadMode mode>
-__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLayeredGrad(texture<float4, texType, mode> texRef,
-                                                   hipTextureObject_t textureObject, float x,
-                                                   float y, int layer, float2 dx, float2 dy) {
-    TEXTURE_PARAMETERS_INIT;
-    texel.f =
-        __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data,
-                                     float2(dx.x, dx.y).data,
-                                     float2(dy.x, dy.y).data);
-    TEXTURE_RETURN_FLOAT_XYZW;
-}
-#endif
diff --git a/third_party/rocm/include/hip/hcc_detail/texture_indirect_functions.h b/third_party/rocm/include/hip/hcc_detail/texture_indirect_functions.h
deleted file mode 100644
index 2fe33f3..0000000
--- a/third_party/rocm/include/hip/hcc_detail/texture_indirect_functions.h
+++ /dev/null
@@ -1,501 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#pragma once
-
-#if defined(__cplusplus)
-
-#include <hip/hip_vector_types.h>
-#include <hip/hip_texture_types.h>
-#include <hip/hcc_detail/ockl_image.h>
-
-#include <type_traits>
-
-#define TEXTURE_OBJECT_PARAMETERS_INIT                                                            \
-    unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)textureObject; \
-    unsigned int ADDRESS_SPACE_CONSTANT* s = i + HIP_SAMPLER_OBJECT_OFFSET_DWORD;
-
-template<typename T>
-struct __hip_is_itex_channel_type
-{
-    static constexpr bool value =
-        std::is_same<T, char>::value ||
-        std::is_same<T, unsigned char>::value ||
-        std::is_same<T, short>::value ||
-        std::is_same<T, unsigned short>::value ||
-        std::is_same<T, int>::value ||
-        std::is_same<T, unsigned int>::value ||
-        std::is_same<T, float>::value;
-};
-
-template<
-    typename T,
-    unsigned int rank>
-struct __hip_is_itex_channel_type<HIP_vector_type<T, rank>>
-{
-    static constexpr bool value =
-        __hip_is_itex_channel_type<T>::value &&
-        ((rank == 1) ||
-         (rank == 2) ||
-         (rank == 4));
-};
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex1Dfetch(hipTextureObject_t textureObject, int x)
-{
-    TEXTURE_OBJECT_PARAMETERS_INIT
-    auto tmp = __ockl_image_load_1Db(i, x);
-    return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex1Dfetch(T *ptr, hipTextureObject_t textureObject, int x)
-{
-    *ptr = tex1Dfetch<T>(textureObject, x);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex1D(hipTextureObject_t textureObject, float x)
-{
-    TEXTURE_OBJECT_PARAMETERS_INIT
-    auto tmp = __ockl_image_sample_1D(i, s, x);
-    return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex1D(T *ptr, hipTextureObject_t textureObject, float x)
-{
-    *ptr = tex1D<T>(textureObject, x);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex2D(hipTextureObject_t textureObject, float x, float y)
-{
-    TEXTURE_OBJECT_PARAMETERS_INIT
-    auto tmp = __ockl_image_sample_2D(i, s, float2(x, y).data);
-    return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex2D(T *ptr, hipTextureObject_t textureObject, float x, float y)
-{
-    *ptr = tex2D<T>(textureObject, x, y);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex3D(hipTextureObject_t textureObject, float x, float y, float z)
-{
-    TEXTURE_OBJECT_PARAMETERS_INIT
-    auto tmp = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
-    return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex3D(T *ptr, hipTextureObject_t textureObject, float x, float y, float z)
-{
-    *ptr = tex3D<T>(textureObject, x, y, z);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex1DLayered(hipTextureObject_t textureObject, float x, int layer)
-{
-    TEXTURE_OBJECT_PARAMETERS_INIT
-    auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex1DLayered(T *ptr, hipTextureObject_t textureObject, float x, int layer)
-{
-    *ptr = tex1DLayered<T>(textureObject, x, layer);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex2DLayered(hipTextureObject_t textureObject, float x, float y, int layer)
-{
-    TEXTURE_OBJECT_PARAMETERS_INIT
-    auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex2DLayered(T *ptr, hipTextureObject_t textureObject, float x, float y, int layer)
-{
-    *ptr = tex1DLayered<T>(textureObject, x, y, layer);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__  T texCubemap(hipTextureObject_t textureObject, float x, float y, float z)
-{
-    TEXTURE_OBJECT_PARAMETERS_INIT
-    auto tmp = __ockl_image_sample_CM(i, s, float4(x, y, z, 0.0f).data);
-    return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void texCubemap(T *ptr, hipTextureObject_t textureObject, float x, float y, float z)
-{
-    *ptr = texCubemap<T>(textureObject, x, y, z);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T texCubemapLayered(hipTextureObject_t textureObject, float x, float y, float z, int layer)
-{
-    TEXTURE_OBJECT_PARAMETERS_INIT
-    auto tmp = __ockl_image_sample_CMa(i, s, float4(x, y, z, layer).data);
-    return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void texCubemapLayered(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, int layer)
-{
-    *ptr = texCubemapLayered<T>(textureObject, x, y, z, layer);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex2Dgather(hipTextureObject_t textureObject, float x, float y, int comp = 0)
-{
-    TEXTURE_OBJECT_PARAMETERS_INIT
-    switch (comp) {
-    case 1: {
-        auto tmp = __ockl_image_gather4r_2D(i, s, float2(x, y).data);
-        return *reinterpret_cast<T*>(&tmp);
-        break;
-    }
-    case 2: {
-        auto tmp = __ockl_image_gather4g_2D(i, s, float2(x, y).data);
-        return *reinterpret_cast<T*>(&tmp);
-        break;
-    }
-    case 3: {
-        auto tmp = __ockl_image_gather4b_2D(i, s, float2(x, y).data);
-        return *reinterpret_cast<T*>(&tmp);
-        break;
-    }
-    default: {
-        auto tmp = __ockl_image_gather4a_2D(i, s, float2(x, y).data);
-        return *reinterpret_cast<T*>(&tmp);
-        break;
-    }
-    };
-    return {};
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex2Dgather(T *ptr, hipTextureObject_t textureObject, float x, float y, int comp = 0)
-{
-    *ptr = texCubemapLayered<T>(textureObject, x, y, comp);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex1DLod(hipTextureObject_t textureObject, float x, float level)
-{
-    TEXTURE_OBJECT_PARAMETERS_INIT
-    auto tmp = __ockl_image_sample_lod_1D(i, s, x, level);
-    return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex1DLod(T *ptr, hipTextureObject_t textureObject, float x, float level)
-{
-    *ptr = tex1DLod<T>(textureObject, x, level);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex2DLod(hipTextureObject_t textureObject, float x, float y, float level)
-{
-    TEXTURE_OBJECT_PARAMETERS_INIT
-    auto tmp = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
-    return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex2DLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float level)
-{
-    *ptr = tex2DLod<T>(textureObject, x, y, level);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex3DLod(hipTextureObject_t textureObject, float x, float y, float z, float level)
-{
-    TEXTURE_OBJECT_PARAMETERS_INIT
-    auto tmp = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, level);
-    return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex3DLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float level)
-{
-    *ptr = tex3DLod<T>(textureObject, x, y, z, level);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex1DLayeredLod(hipTextureObject_t textureObject, float x, int layer, float level)
-{
-    TEXTURE_OBJECT_PARAMETERS_INIT
-    auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
-    return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex1DLayeredLod(T *ptr, hipTextureObject_t textureObject, float x, int layer, float level)
-{
-    *ptr = tex1DLayeredLod<T>(textureObject, x, layer, level);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__  T tex2DLayeredLod(hipTextureObject_t textureObject, float x, float y, int layer, float level)
-{
-    TEXTURE_OBJECT_PARAMETERS_INIT
-    auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
-    return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex2DLayeredLod(T *ptr, hipTextureObject_t textureObject, float x, float y, int layer, float level)
-{
-    *ptr = tex2DLayeredLod<T>(textureObject, x, y, layer, level);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T texCubemapLod(hipTextureObject_t textureObject, float x, float y, float z, float level)
-{
-    TEXTURE_OBJECT_PARAMETERS_INIT
-    auto tmp = __ockl_image_sample_lod_CM(i, s, float4(x, y, z, 0.0f).data, level);
-    return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void texCubemapLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float level)
-{
-    *ptr = texCubemapLod<T>(textureObject, x, y, z, level);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T texCubemapGrad(hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy)
-{
-    TEXTURE_OBJECT_PARAMETERS_INIT
-    // TODO missing in device libs.
-    // auto tmp = __ockl_image_sample_grad_CM(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
-    // return *reinterpret_cast<T*>(&tmp);
-    return {};
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void texCubemapGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy)
-{
-    *ptr = texCubemapGrad<T>(textureObject, x, y, z, dPdx, dPdy);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T texCubemapLayeredLod(hipTextureObject_t textureObject, float x, float y, float z, int layer, float level)
-{
-    TEXTURE_OBJECT_PARAMETERS_INIT
-    auto tmp = __ockl_image_sample_lod_CMa(i, s, float4(x, y, z, layer).data, level);
-    return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void texCubemapLayeredLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, int layer, float level)
-{
-    *ptr = texCubemapLayeredLod<T>(textureObject, x, y, z, layer, level);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex1DGrad(hipTextureObject_t textureObject, float x, float dPdx, float dPdy)
-{
-    TEXTURE_OBJECT_PARAMETERS_INIT
-    auto tmp = __ockl_image_sample_grad_1D(i, s, x, dPdx, dPdy);
-    return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex1DGrad(T *ptr, hipTextureObject_t textureObject, float x, float dPdx, float dPdy)
-{
-    *ptr = tex1DGrad<T>(textureObject, x, dPdx, dPdy);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex2DGrad(hipTextureObject_t textureObject, float x, float y, float2 dPdx, float2 dPdy)
-{
-    TEXTURE_OBJECT_PARAMETERS_INIT
-    auto tmp = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, float2(dPdx.x, dPdx.y).data,  float2(dPdy.x, dPdy.y).data);
-    return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex2DGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float2 dPdx, float2 dPdy)
-{
-    *ptr = tex2DGrad<T>(textureObject, x, y, dPdx, dPdy);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex3DGrad(hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy)
-{
-    TEXTURE_OBJECT_PARAMETERS_INIT
-    auto tmp = __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
-    return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex3DGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy)
-{
-    *ptr = tex3DGrad<T>(textureObject, x, y, z, dPdx, dPdy);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex1DLayeredGrad(hipTextureObject_t textureObject, float x, int layer, float dPdx, float dPdy)
-{
-    TEXTURE_OBJECT_PARAMETERS_INIT
-    auto tmp = __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dPdx, dPdy);
-    return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex1DLayeredGrad(T *ptr, hipTextureObject_t textureObject, float x, int layer, float dPdx, float dPdy)
-{
-    *ptr = tex1DLayeredGrad<T>(textureObject, x, layer, dPdx, dPdy);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ T tex2DLayeredGrad(hipTextureObject_t textureObject, float x, float y, int layer, float2 dPdx, float2 dPdy)
-{
-    TEXTURE_OBJECT_PARAMETERS_INIT
-    auto tmp = __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data);
-    return *reinterpret_cast<T*>(&tmp);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void tex2DLayeredGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, int layer, float2 dPdx, float2 dPdy)
-{
-    *ptr = tex2DLayeredGrad<T>(textureObject, x, y, layer, dPdx, dPdy);
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__  T texCubemapLayeredGrad(hipTextureObject_t textureObject, float x, float y, float z, int layer, float4 dPdx, float4 dPdy)
-{
-    TEXTURE_OBJECT_PARAMETERS_INIT
-    // TODO missing in device libs.
-    // auto tmp = __ockl_image_sample_grad_CMa(i, s, float4(x, y, z, layer).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
-    // return *reinterpret_cast<T*>(&tmp);
-    return {};
-}
-
-template <
-    typename T,
-    typename std::enable_if<__hip_is_itex_channel_type<T>::value>::type* = nullptr>
-static __device__ void texCubemapLayeredGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, int layer, float4 dPdx, float4 dPdy)
-{
-    *ptr = texCubemapLayeredGrad<T>(textureObject, x, y, z, layer, dPdx, dPdy);
-}
-
-#endif
diff --git a/third_party/rocm/include/hip/hcc_detail/texture_types.h b/third_party/rocm/include/hip/hcc_detail/texture_types.h
deleted file mode 100644
index 832b909..0000000
--- a/third_party/rocm/include/hip/hcc_detail/texture_types.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-
-#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_TEXTURE_TYPES_H
-#define HIP_INCLUDE_HIP_HCC_DETAIL_TEXTURE_TYPES_H
-
-#include <hip/hcc_detail/driver_types.h>
-
-#define hipTextureType1D 0x01
-#define hipTextureType2D 0x02
-#define hipTextureType3D 0x03
-#define hipTextureTypeCubemap 0x0C
-#define hipTextureType1DLayered 0xF1
-#define hipTextureType2DLayered 0xF2
-#define hipTextureTypeCubemapLayered 0xFC
-
-/**
- * Should be same as HSA_IMAGE_OBJECT_SIZE_DWORD/HSA_SAMPLER_OBJECT_SIZE_DWORD
- */
-#define HIP_IMAGE_OBJECT_SIZE_DWORD 12
-#define HIP_SAMPLER_OBJECT_SIZE_DWORD 8
-#define HIP_SAMPLER_OBJECT_OFFSET_DWORD HIP_IMAGE_OBJECT_SIZE_DWORD
-#define HIP_TEXTURE_OBJECT_SIZE_DWORD (HIP_IMAGE_OBJECT_SIZE_DWORD + HIP_SAMPLER_OBJECT_SIZE_DWORD)
-
-/**
- * An opaque value that represents a hip texture object
- */
-struct __hip_texture;
-typedef struct __hip_texture* hipTextureObject_t;
-
-/**
- * hip texture address modes
- */
-enum hipTextureAddressMode {
-    hipAddressModeWrap = 0,
-    hipAddressModeClamp = 1,
-    hipAddressModeMirror = 2,
-    hipAddressModeBorder = 3
-};
-
-/**
- * hip texture filter modes
- */
-enum hipTextureFilterMode { hipFilterModePoint = 0, hipFilterModeLinear = 1 };
-
-/**
- * hip texture read modes
- */
-enum hipTextureReadMode { hipReadModeElementType = 0, hipReadModeNormalizedFloat = 1 };
-
-/**
- * hip texture reference
- */
-typedef struct textureReference {
-    int normalized;
-    enum hipTextureReadMode readMode;// used only for driver API's
-    enum hipTextureFilterMode filterMode;
-    enum hipTextureAddressMode addressMode[3];  // Texture address mode for up to 3 dimensions
-    struct hipChannelFormatDesc channelDesc;
-    int sRGB;                    // Perform sRGB->linear conversion during texture read
-    unsigned int maxAnisotropy;  // Limit to the anisotropy ratio
-    enum hipTextureFilterMode mipmapFilterMode;
-    float mipmapLevelBias;
-    float minMipmapLevelClamp;
-    float maxMipmapLevelClamp;
-
-    hipTextureObject_t textureObject;
-    int numChannels;
-    enum hipArray_Format format;
-}textureReference;
-
-/**
- * hip texture descriptor
- */
-typedef struct hipTextureDesc {
-    enum hipTextureAddressMode addressMode[3];  // Texture address mode for up to 3 dimensions
-    enum hipTextureFilterMode filterMode;
-    enum hipTextureReadMode readMode;
-    int sRGB;  // Perform sRGB->linear conversion during texture read
-    float borderColor[4];
-    int normalizedCoords;
-    unsigned int maxAnisotropy;
-    enum hipTextureFilterMode mipmapFilterMode;
-    float mipmapLevelBias;
-    float minMipmapLevelClamp;
-    float maxMipmapLevelClamp;
-}hipTextureDesc;
-
-#endif
diff --git a/third_party/rocm/include/hip/hip_bfloat16.h b/third_party/rocm/include/hip/hip_bfloat16.h
deleted file mode 100644
index ef09cf0..0000000
--- a/third_party/rocm/include/hip/hip_bfloat16.h
+++ /dev/null
@@ -1,280 +0,0 @@
-/**
- * MIT License
- *
- * Copyright 2019-2020 Advanced Micro Devices, Inc. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*!\file
- * \brief hip_bfloat16.h provides struct for hip_bfloat16 typedef
- */
-
-#ifndef _HIP_BFLOAT16_H_
-#define _HIP_BFLOAT16_H_
-
-#if __cplusplus < 201103L || (!defined(__HCC__) && !defined(__HIPCC__))
-
-// If this is a C compiler, C++ compiler below C++11, or a host-only compiler, we only
-// include a minimal definition of hip_bfloat16
-
-#include <stdint.h>
-/*! \brief Struct to represent a 16 bit brain floating point number. */
-typedef struct
-{
-    uint16_t data;
-} hip_bfloat16;
-
-#else // __cplusplus < 201103L || (!defined(__HCC__) && !defined(__HIPCC__))
-
-#include <cmath>
-#include <cstddef>
-#include <cstdint>
-#include <hip/hip_runtime.h>
-#include <ostream>
-#include <type_traits>
-
-struct hip_bfloat16
-{
-    uint16_t data;
-
-    enum truncate_t
-    {
-        truncate
-    };
-
-    __host__ __device__ hip_bfloat16() = default;
-
-    // round upper 16 bits of IEEE float to convert to bfloat16
-    explicit __host__ __device__ hip_bfloat16(float f)
-        : data(float_to_bfloat16(f))
-    {
-    }
-
-    explicit __host__ __device__ hip_bfloat16(float f, truncate_t)
-        : data(truncate_float_to_bfloat16(f))
-    {
-    }
-
-    // zero extend lower 16 bits of bfloat16 to convert to IEEE float
-    __host__ __device__ operator float() const
-    {
-        union
-        {
-            uint32_t int32;
-            float    fp32;
-        } u = {uint32_t(data) << 16};
-        return u.fp32;
-    }
-
-    static  __host__ __device__ hip_bfloat16 round_to_bfloat16(float f)
-    {
-        hip_bfloat16 output;
-        output.data = float_to_bfloat16(f);
-        return output;
-    }
-
-    static  __host__ __device__ hip_bfloat16 round_to_bfloat16(float f, truncate_t)
-    {
-        hip_bfloat16 output;
-        output.data = truncate_float_to_bfloat16(f);
-        return output;
-    }
-
-private:
-    static __host__ __device__ uint16_t float_to_bfloat16(float f)
-    {
-        union
-        {
-            float    fp32;
-            uint32_t int32;
-        } u = {f};
-        if(~u.int32 & 0x7f800000)
-        {
-            // When the exponent bits are not all 1s, then the value is zero, normal,
-            // or subnormal. We round the bfloat16 mantissa up by adding 0x7FFF, plus
-            // 1 if the least significant bit of the bfloat16 mantissa is 1 (odd).
-            // This causes the bfloat16's mantissa to be incremented by 1 if the 16
-            // least significant bits of the float mantissa are greater than 0x8000,
-            // or if they are equal to 0x8000 and the least significant bit of the
-            // bfloat16 mantissa is 1 (odd). This causes it to be rounded to even when
-            // the lower 16 bits are exactly 0x8000. If the bfloat16 mantissa already
-            // has the value 0x7f, then incrementing it causes it to become 0x00 and
-            // the exponent is incremented by one, which is the next higher FP value
-            // to the unrounded bfloat16 value. When the bfloat16 value is subnormal
-            // with an exponent of 0x00 and a mantissa of 0x7F, it may be rounded up
-            // to a normal value with an exponent of 0x01 and a mantissa of 0x00.
-            // When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F,
-            // incrementing it causes it to become an exponent of 0xFF and a mantissa
-            // of 0x00, which is Inf, the next higher value to the unrounded value.
-            u.int32 += 0x7fff + ((u.int32 >> 16) & 1); // Round to nearest, round to even
-        }
-        else if(u.int32 & 0xffff)
-        {
-            // When all of the exponent bits are 1, the value is Inf or NaN.
-            // Inf is indicated by a zero mantissa. NaN is indicated by any nonzero
-            // mantissa bit. Quiet NaN is indicated by the most significant mantissa
-            // bit being 1. Signaling NaN is indicated by the most significant
-            // mantissa bit being 0 but some other bit(s) being 1. If any of the
-            // lower 16 bits of the mantissa are 1, we set the least significant bit
-            // of the bfloat16 mantissa, in order to preserve signaling NaN in case
-            // the bloat16's mantissa bits are all 0.
-            u.int32 |= 0x10000; // Preserve signaling NaN
-        }
-        return uint16_t(u.int32 >> 16);
-    }
-
-    // Truncate instead of rounding, preserving SNaN
-    static __host__ __device__ uint16_t truncate_float_to_bfloat16(float f)
-    {
-        union
-        {
-            float    fp32;
-            uint32_t int32;
-        } u = {f};
-        return uint16_t(u.int32 >> 16) | (!(~u.int32 & 0x7f800000) && (u.int32 & 0xffff));
-    }
-};
-
-typedef struct
-{
-    uint16_t data;
-} hip_bfloat16_public;
-
-static_assert(std::is_standard_layout<hip_bfloat16>{},
-              "hip_bfloat16 is not a standard layout type, and thus is "
-              "incompatible with C.");
-
-static_assert(std::is_trivial<hip_bfloat16>{},
-              "hip_bfloat16 is not a trivial type, and thus is "
-              "incompatible with C.");
-
-static_assert(sizeof(hip_bfloat16) == sizeof(hip_bfloat16_public)
-                  && offsetof(hip_bfloat16, data) == offsetof(hip_bfloat16_public, data),
-              "internal hip_bfloat16 does not match public hip_bfloat16");
-
-inline std::ostream& operator<<(std::ostream& os, const hip_bfloat16& bf16)
-{
-    return os << float(bf16);
-}
-inline __host__ __device__ hip_bfloat16 operator+(hip_bfloat16 a)
-{
-    return a;
-}
-inline __host__ __device__ hip_bfloat16 operator-(hip_bfloat16 a)
-{
-    a.data ^= 0x8000;
-    return a;
-}
-inline __host__ __device__ hip_bfloat16 operator+(hip_bfloat16 a, hip_bfloat16 b)
-{
-    return hip_bfloat16(float(a) + float(b));
-}
-inline __host__ __device__ hip_bfloat16 operator-(hip_bfloat16 a, hip_bfloat16 b)
-{
-    return hip_bfloat16(float(a) - float(b));
-}
-inline __host__ __device__ hip_bfloat16 operator*(hip_bfloat16 a, hip_bfloat16 b)
-{
-    return hip_bfloat16(float(a) * float(b));
-}
-inline __host__ __device__ hip_bfloat16 operator/(hip_bfloat16 a, hip_bfloat16 b)
-{
-    return hip_bfloat16(float(a) / float(b));
-}
-inline __host__ __device__ bool operator<(hip_bfloat16 a, hip_bfloat16 b)
-{
-    return float(a) < float(b);
-}
-inline __host__ __device__ bool operator==(hip_bfloat16 a, hip_bfloat16 b)
-{
-    return float(a) == float(b);
-}
-inline __host__ __device__ bool operator>(hip_bfloat16 a, hip_bfloat16 b)
-{
-    return b < a;
-}
-inline __host__ __device__ bool operator<=(hip_bfloat16 a, hip_bfloat16 b)
-{
-    return !(a > b);
-}
-inline __host__ __device__ bool operator!=(hip_bfloat16 a, hip_bfloat16 b)
-{
-    return !(a == b);
-}
-inline __host__ __device__ bool operator>=(hip_bfloat16 a, hip_bfloat16 b)
-{
-    return !(a < b);
-}
-inline __host__ __device__ hip_bfloat16& operator+=(hip_bfloat16& a, hip_bfloat16 b)
-{
-    return a = a + b;
-}
-inline __host__ __device__ hip_bfloat16& operator-=(hip_bfloat16& a, hip_bfloat16 b)
-{
-    return a = a - b;
-}
-inline __host__ __device__ hip_bfloat16& operator*=(hip_bfloat16& a, hip_bfloat16 b)
-{
-    return a = a * b;
-}
-inline __host__ __device__ hip_bfloat16& operator/=(hip_bfloat16& a, hip_bfloat16 b)
-{
-    return a = a / b;
-}
-inline __host__ __device__ hip_bfloat16& operator++(hip_bfloat16& a)
-{
-    return a += hip_bfloat16(1.0f);
-}
-inline __host__ __device__ hip_bfloat16& operator--(hip_bfloat16& a)
-{
-    return a -= hip_bfloat16(1.0f);
-}
-inline __host__ __device__ hip_bfloat16 operator++(hip_bfloat16& a, int)
-{
-    hip_bfloat16 orig = a;
-    ++a;
-    return orig;
-}
-inline __host__ __device__ hip_bfloat16 operator--(hip_bfloat16& a, int)
-{
-    hip_bfloat16 orig = a;
-    --a;
-    return orig;
-}
-
-namespace std
-{
-    constexpr __host__ __device__ bool isinf(hip_bfloat16 a)
-    {
-        return !(~a.data & 0x7f80) && !(a.data & 0x7f);
-    }
-    constexpr __host__ __device__ bool isnan(hip_bfloat16 a)
-    {
-        return !(~a.data & 0x7f80) && +(a.data & 0x7f);
-    }
-    constexpr __host__ __device__ bool iszero(hip_bfloat16 a)
-    {
-        return !(a.data & 0x7fff);
-    }
-}
-
-#endif // __cplusplus < 201103L || (!defined(__HCC__) && !defined(__HIPCC__))
-
-#endif // _HIP_BFLOAT16_H_
diff --git a/third_party/rocm/include/hip/hip_common.h b/third_party/rocm/include/hip/hip_common.h
deleted file mode 100644
index 79c787b..0000000
--- a/third_party/rocm/include/hip/hip_common.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HIP_COMMON_H
-#define HIP_INCLUDE_HIP_HIP_COMMON_H
-
-// Common code included at start of every hip file.
-// Auto enable __HIP_PLATFORM_HCC__ if compiling with HCC
-// Other compiler (GCC,ICC,etc) need to set one of these macros explicitly
-#if defined(__HCC__) || (defined(__clang__) && defined(__HIP__))
-#define __HIP_PLATFORM_HCC__
-#endif  //__HCC__
-
-// Auto enable __HIP_PLATFORM_NVCC__ if compiling with NVCC
-#if defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__) && !defined(__HIP__))
-#define __HIP_PLATFORM_NVCC__
-#ifdef __CUDACC__
-#define __HIPCC__
-#endif
-
-#endif  //__NVCC__
-
-// Auto enable __HIP_DEVICE_COMPILE__ if compiled in HCC or NVCC device path
-#if (defined(__HCC_ACCELERATOR__) && __HCC_ACCELERATOR__ != 0) ||                                  \
-    (defined(__CUDA_ARCH__) && __CUDA_ARCH__ != 0)
-#define __HIP_DEVICE_COMPILE__ 1
-#endif
-
-#ifdef __GNUC__
-#define HIP_PUBLIC_API              __attribute__ ((visibility ("default")))
-#define HIP_INTERNAL_EXPORTED_API   __attribute__ ((visibility ("default")))
-#else
-#define HIP_PUBLIC_API
-#define HIP_INTERNAL_EXPORTED_API 
-#endif
-
-#if __HIP_DEVICE_COMPILE__ == 0
-// 32-bit Atomics
-#define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (0)
-#define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (0)
-#define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (0)
-#define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (0)
-#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (0)
-
-// 64-bit Atomics
-#define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (0)
-#define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (0)
-
-// Doubles
-#define __HIP_ARCH_HAS_DOUBLES__ (0)
-
-// Warp cross-lane operations
-#define __HIP_ARCH_HAS_WARP_VOTE__ (0)
-#define __HIP_ARCH_HAS_WARP_BALLOT__ (0)
-#define __HIP_ARCH_HAS_WARP_SHUFFLE__ (0)
-#define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (0)
-
-// Sync
-#define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (0)
-#define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (0)
-
-// Misc
-#define __HIP_ARCH_HAS_SURFACE_FUNCS__ (0)
-#define __HIP_ARCH_HAS_3DGRID__ (0)
-#define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (0)
-#endif
-
-#endif
diff --git a/third_party/rocm/include/hip/hip_complex.h b/third_party/rocm/include/hip/hip_complex.h
deleted file mode 100644
index fb9cad5..0000000
--- a/third_party/rocm/include/hip/hip_complex.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HIP_COMPLEX_H
-#define HIP_INCLUDE_HIP_HIP_COMPLEX_H
-
-#include <hip/hip_common.h>
-
-#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
-#include <hip/hcc_detail/hip_complex.h>
-#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
-#include <hip/nvcc_detail/hip_complex.h>
-#else
-#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
-#endif
-
-#endif
diff --git a/third_party/rocm/include/hip/hip_cooperative_groups.h b/third_party/rocm/include/hip/hip_cooperative_groups.h
deleted file mode 100644
index 41f3637..0000000
--- a/third_party/rocm/include/hip/hip_cooperative_groups.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/**
- *  @file  hip_cooperative_groups.h
- *
- *  @brief Defines new types and device API wrappers for `Cooperative Group`
- *  feature.
- */
-
-#ifndef  HIP_INCLUDE_HIP_HIP_COOPERATIVE_GROUP_H
-#define  HIP_INCLUDE_HIP_HIP_COOPERATIVE_GROUP_H
-
-#include <hip/hip_version.h>
-#include <hip/hip_common.h>
-
-#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
-#if __cplusplus && defined(__clang__) && defined(__HIP__)
-#include <hip/hcc_detail/hip_cooperative_groups.h>
-#endif
-#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
-#include <hip/nvcc_detail/hip_cooperative_groups.h>
-#else
-#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
-#endif
-
-#endif // HIP_INCLUDE_HIP_HIP_COOPERATIVE_GROUP_H
diff --git a/third_party/rocm/include/hip/hip_ext.h b/third_party/rocm/include/hip/hip_ext.h
deleted file mode 100644
index ef8f53b..0000000
--- a/third_party/rocm/include/hip/hip_ext.h
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HIP_EXT_H
-#define HIP_INCLUDE_HIP_HIP_EXT_H
-#include "hip/hip_runtime.h"
-#if defined(__cplusplus)
-#include <tuple>
-#include <type_traits>
-#endif
-/** @addtogroup Module Module Management
- *  @{
- */
-
-/**
- * @brief launches kernel f with launch parameters and shared memory on stream with arguments passed
- to kernelparams or extra
- *
- * @param [in[ f     Kernel to launch.
- * @param [in] gridDimX  X grid dimension specified in work-items
- * @param [in] gridDimY  Y grid dimension specified in work-items
- * @param [in] gridDimZ  Z grid dimension specified in work-items
- * @param [in] blockDimX X block dimensions specified in work-items
- * @param [in] blockDimY Y grid dimension specified in work-items
- * @param [in] blockDimZ Z grid dimension specified in work-items
- * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel.  The
- kernel can access this with HIP_DYNAMIC_SHARED.
- * @param [in] stream Stream where the kernel should be dispatched.  May be 0, in which case th
- default stream is used with associated synchronization rules.
- * @param [in] kernelParams
- * @param [in] extra     Pointer to kernel arguments.   These are passed directly to the kernel and
- must be in the memory layout and alignment expected by the kernel.
- * @param [in] startEvent  If non-null, specified event will be updated to track the start time of
- the kernel launch.  The event must be created before calling this API.
- * @param [in] stopEvent   If non-null, specified event will be updated to track the stop time of
- the kernel launch.  The event must be created before calling this API.
- *
- * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue
- *
- * @warning kernellParams argument is not yet implemented in HIP. Please use extra instead. Please
- refer to hip_porting_driver_api.md for sample usage.
- * HIP/ROCm actually updates the start event when the associated kernel completes.
- */
-HIP_PUBLIC_API
-hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
-                                    uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
-                                    uint32_t localWorkSizeX, uint32_t localWorkSizeY,
-                                    uint32_t localWorkSizeZ, size_t sharedMemBytes,
-                                    hipStream_t hStream, void** kernelParams, void** extra,
-                                    hipEvent_t startEvent = nullptr,
-                                    hipEvent_t stopEvent = nullptr,
-                                    uint32_t flags = 0);
-
-HIP_PUBLIC_API
-hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
-                                    uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
-                                    uint32_t localWorkSizeX, uint32_t localWorkSizeY,
-                                    uint32_t localWorkSizeZ, size_t sharedMemBytes,
-                                    hipStream_t hStream, void** kernelParams, void** extra,
-                                    hipEvent_t startEvent = nullptr,
-                                    hipEvent_t stopEvent = nullptr)
-                                    __attribute__((deprecated("use hipExtModuleLaunchKernel instead")));
-
-#if defined(__HIP_ROCclr__) && defined(__cplusplus)
-
-extern "C" hipError_t hipExtLaunchKernel(const void* function_address, dim3 numBlocks,
-                                         dim3 dimBlocks, void** args, size_t sharedMemBytes,
-                                         hipStream_t stream, hipEvent_t startEvent,
-                                         hipEvent_t stopEvent, int flags);
-
-template <typename... Args, typename F = void (*)(Args...)>
-inline void hipExtLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
-                                  std::uint32_t sharedMemBytes, hipStream_t stream,
-                                  hipEvent_t startEvent, hipEvent_t stopEvent, std::uint32_t flags,
-                                  Args... args) {
-    constexpr size_t count = sizeof...(Args);
-    auto tup_ = std::tuple<Args...>{args...};
-    auto tup = validateArgsCountType(kernel, tup_);
-    void* _Args[count];
-    pArgs<0>(tup, _Args);
-
-    auto k = reinterpret_cast<void*>(kernel);
-    hipExtLaunchKernel(k, numBlocks, dimBlocks, _Args, sharedMemBytes, stream, startEvent,
-                       stopEvent, (int)flags);
-}
-#elif defined(__HIP_PLATFORM_HCC__) && GENERIC_GRID_LAUNCH == 1 && defined(__HCC__)
-//kernel_descriptor and hip_impl::make_kernarg are in "grid_launch_GGL.hpp"
-
-namespace hip_impl {
-inline
-__attribute__((visibility("hidden")))
-void hipExtLaunchKernelGGLImpl(
-    std::uintptr_t function_address,
-    const dim3& numBlocks,
-    const dim3& dimBlocks,
-    std::uint32_t sharedMemBytes,
-    hipStream_t stream,
-    hipEvent_t startEvent,
-    hipEvent_t stopEvent,
-    std::uint32_t flags,
-    void** kernarg) {
-
-    const auto& kd = hip_impl::get_program_state()
-        .kernel_descriptor(function_address, target_agent(stream));
-
-    hipExtModuleLaunchKernel(kd, numBlocks.x * dimBlocks.x,
-                             numBlocks.y * dimBlocks.y,
-                             numBlocks.z * dimBlocks.z,
-                             dimBlocks.x, dimBlocks.y, dimBlocks.z,
-                             sharedMemBytes, stream, nullptr, kernarg,
-                             startEvent, stopEvent, flags);
-}
-}  // namespace hip_impl
-
-template <typename... Args, typename F = void (*)(Args...)>
-inline
-void hipExtLaunchKernelGGL(F kernel, const dim3& numBlocks,
-                           const dim3& dimBlocks, std::uint32_t sharedMemBytes,
-                           hipStream_t stream, hipEvent_t startEvent,
-                           hipEvent_t stopEvent, std::uint32_t flags,
-                           Args... args) {
-    hip_impl::hip_init();
-    auto kernarg =
-        hip_impl::make_kernarg(kernel, std::tuple<Args...>{std::move(args)...});
-    std::size_t kernarg_size = kernarg.size();
-
-    void* config[]{
-        HIP_LAUNCH_PARAM_BUFFER_POINTER,
-        kernarg.data(),
-        HIP_LAUNCH_PARAM_BUFFER_SIZE,
-        &kernarg_size,
-        HIP_LAUNCH_PARAM_END};
-
-    hip_impl::hipExtLaunchKernelGGLImpl(reinterpret_cast<std::uintptr_t>(kernel),
-                                        numBlocks, dimBlocks, sharedMemBytes,
-                                        stream, startEvent, stopEvent, flags,
-                                        &config[0]);
-}
-#endif // !__HIP_ROCclr__ && defined(__cplusplus)
-
-// doxygen end AMD-specific features
-/**
- * @}
- */
-#endif  // #iidef HIP_INCLUDE_HIP_HIP_EXT_H
diff --git a/third_party/rocm/include/hip/hip_fp16.h b/third_party/rocm/include/hip/hip_fp16.h
deleted file mode 100644
index 994ce62..0000000
--- a/third_party/rocm/include/hip/hip_fp16.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HIP_FP16_H
-#define HIP_INCLUDE_HIP_HIP_FP16_H
-
-#include <hip/hip_common.h>
-
-#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
-#include <hip/hcc_detail/hip_fp16.h>
-#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
-#include "cuda_fp16.h"
-#else
-#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
-#endif
-
-#endif
diff --git a/third_party/rocm/include/hip/hip_hcc.h b/third_party/rocm/include/hip/hip_hcc.h
deleted file mode 100644
index e7e27fc..0000000
--- a/third_party/rocm/include/hip/hip_hcc.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HIP_HCC_H
-#define HIP_INCLUDE_HIP_HIP_HCC_H
-#warning "hip/hip_hcc.h is deprecated, please use hip/hip_ext.h"
-#include "hip/hip_ext.h"
-#endif  // #ifdef HIP_INCLUDE_HIP_HIP_HCC_H
diff --git a/third_party/rocm/include/hip/hip_profile.h b/third_party/rocm/include/hip/hip_profile.h
deleted file mode 100644
index ff18239..0000000
--- a/third_party/rocm/include/hip/hip_profile.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_HIP_PROFILE_H
-#define HIP_INCLUDE_HIP_HIP_PROFILE_H
-
-#define HIP_SCOPED_MARKER(markerName, group)
-#define HIP_BEGIN_MARKER(markerName, group)
-#define HIP_END_MARKER()
-
-#endif
diff --git a/third_party/rocm/include/hip/hip_runtime.h b/third_party/rocm/include/hip/hip_runtime.h
deleted file mode 100644
index c785f8d..0000000
--- a/third_party/rocm/include/hip/hip_runtime.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-//! HIP = Heterogeneous-compute Interface for Portability
-//!
-//! Define a extremely thin runtime layer that allows source code to be compiled unmodified
-//! through either AMD HCC or NVCC.   Key features tend to be in the spirit
-//! and terminology of CUDA, but with a portable path to other accelerators as well:
-//
-//! Both paths support rich C++ features including classes, templates, lambdas, etc.
-//! Runtime API is C
-//! Memory management is based on pure pointers and resembles malloc/free/copy.
-//
-//! hip_runtime.h     : includes everything in hip_api.h, plus math builtins and kernel launch
-//! macros. hip_runtime_api.h : Defines HIP API.  This is a C header file and does not use any C++
-//! features.
-
-#ifndef HIP_INCLUDE_HIP_HIP_RUNTIME_H
-#define HIP_INCLUDE_HIP_HIP_RUNTIME_H
-
-#if (__gfx1010__ || __gfx1011__ || __gfx1012__ || __gfx1030__ || __gfx1031__) && __AMDGCN_WAVEFRONT_SIZE == 64
-#error HIP is not supported on GFX10 with wavefront size 64
-#endif
-
-// Some standard header files, these are included by hc.hpp and so want to make them avail on both
-// paths to provide a consistent include env and avoid "missing symbol" errors that only appears
-// on NVCC path:
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-
-#if __cplusplus > 199711L
-#include <thread>
-#endif
-
-#include <hip/hip_version.h>
-#include <hip/hip_common.h>
-
-#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
-#include <hip/hcc_detail/hip_runtime.h>
-#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
-#include <hip/nvcc_detail/hip_runtime.h>
-#else
-#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
-#endif
-
-
-#include <hip/hip_runtime_api.h>
-#include <hip/hip_vector_types.h>
-#include <hip/library_types.h>
-
-#endif
diff --git a/third_party/rocm/include/hip/hip_runtime_api.h b/third_party/rocm/include/hip/hip_runtime_api.h
deleted file mode 100644
index ed9a288..0000000
--- a/third_party/rocm/include/hip/hip_runtime_api.h
+++ /dev/null
@@ -1,423 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/**
- * @file hip_runtime_api.h
- *
- * @brief Defines the API signatures for HIP runtime.
- * This file can be compiled with a standard compiler.
- */
-
-#ifndef HIP_INCLUDE_HIP_HIP_RUNTIME_API_H
-#define HIP_INCLUDE_HIP_HIP_RUNTIME_API_H
-
-
-#include <string.h>  // for getDeviceProp
-#include <hip/hip_version.h>
-#include <hip/hip_common.h>
-
-enum {
-    HIP_SUCCESS = 0,
-    HIP_ERROR_INVALID_VALUE,
-    HIP_ERROR_NOT_INITIALIZED,
-    HIP_ERROR_LAUNCH_OUT_OF_RESOURCES
-};
-
-typedef struct {
-    // 32-bit Atomics
-    unsigned hasGlobalInt32Atomics : 1;     ///< 32-bit integer atomics for global memory.
-    unsigned hasGlobalFloatAtomicExch : 1;  ///< 32-bit float atomic exch for global memory.
-    unsigned hasSharedInt32Atomics : 1;     ///< 32-bit integer atomics for shared memory.
-    unsigned hasSharedFloatAtomicExch : 1;  ///< 32-bit float atomic exch for shared memory.
-    unsigned hasFloatAtomicAdd : 1;  ///< 32-bit float atomic add in global and shared memory.
-
-    // 64-bit Atomics
-    unsigned hasGlobalInt64Atomics : 1;  ///< 64-bit integer atomics for global memory.
-    unsigned hasSharedInt64Atomics : 1;  ///< 64-bit integer atomics for shared memory.
-
-    // Doubles
-    unsigned hasDoubles : 1;  ///< Double-precision floating point.
-
-    // Warp cross-lane operations
-    unsigned hasWarpVote : 1;     ///< Warp vote instructions (__any, __all).
-    unsigned hasWarpBallot : 1;   ///< Warp ballot instructions (__ballot).
-    unsigned hasWarpShuffle : 1;  ///< Warp shuffle operations. (__shfl_*).
-    unsigned hasFunnelShift : 1;  ///< Funnel two words into one with shift&mask caps.
-
-    // Sync
-    unsigned hasThreadFenceSystem : 1;  ///< __threadfence_system.
-    unsigned hasSyncThreadsExt : 1;     ///< __syncthreads_count, syncthreads_and, syncthreads_or.
-
-    // Misc
-    unsigned hasSurfaceFuncs : 1;        ///< Surface functions.
-    unsigned has3dGrid : 1;              ///< Grid and group dims are 3D (rather than 2D).
-    unsigned hasDynamicParallelism : 1;  ///< Dynamic parallelism.
-} hipDeviceArch_t;
-
-
-//---
-// Common headers for both NVCC and HCC paths:
-
-/**
- * hipDeviceProp
- *
- */
-typedef struct hipDeviceProp_t {
-    char name[256];            ///< Device name.
-    size_t totalGlobalMem;     ///< Size of global memory region (in bytes).
-    size_t sharedMemPerBlock;  ///< Size of shared memory region (in bytes).
-    int regsPerBlock;          ///< Registers per block.
-    int warpSize;              ///< Warp size.
-    int maxThreadsPerBlock;    ///< Max work items per work group or workgroup max size.
-    int maxThreadsDim[3];      ///< Max number of threads in each dimension (XYZ) of a block.
-    int maxGridSize[3];        ///< Max grid dimensions (XYZ).
-    int clockRate;             ///< Max clock frequency of the multiProcessors in khz.
-    int memoryClockRate;       ///< Max global memory clock frequency in khz.
-    int memoryBusWidth;        ///< Global memory bus width in bits.
-    size_t totalConstMem;      ///< Size of shared memory region (in bytes).
-    int major;  ///< Major compute capability.  On HCC, this is an approximation and features may
-                ///< differ from CUDA CC.  See the arch feature flags for portable ways to query
-                ///< feature caps.
-    int minor;  ///< Minor compute capability.  On HCC, this is an approximation and features may
-                ///< differ from CUDA CC.  See the arch feature flags for portable ways to query
-                ///< feature caps.
-    int multiProcessorCount;          ///< Number of multi-processors (compute units).
-    int l2CacheSize;                  ///< L2 cache size.
-    int maxThreadsPerMultiProcessor;  ///< Maximum resident threads per multi-processor.
-    int computeMode;                  ///< Compute mode.
-    int clockInstructionRate;  ///< Frequency in khz of the timer used by the device-side "clock*"
-                               ///< instructions.  New for HIP.
-    hipDeviceArch_t arch;      ///< Architectural feature flags.  New for HIP.
-    int concurrentKernels;     ///< Device can possibly execute multiple kernels concurrently.
-    int pciDomainID;           ///< PCI Domain ID
-    int pciBusID;              ///< PCI Bus ID.
-    int pciDeviceID;           ///< PCI Device ID.
-    size_t maxSharedMemoryPerMultiProcessor;  ///< Maximum Shared Memory Per Multiprocessor.
-    int isMultiGpuBoard;                      ///< 1 if device is on a multi-GPU board, 0 if not.
-    int canMapHostMemory;                     ///< Check whether HIP can map host memory
-    int gcnArch;                              ///< DEPRECATED: use gcnArchName instead
-    char gcnArchName[256];                    ///< AMD GCN Arch Name.
-    int integrated;            ///< APU vs dGPU
-    int cooperativeLaunch;            ///< HIP device supports cooperative launch
-    int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple devices
-    int maxTexture1DLinear;    ///< Maximum size for 1D textures bound to linear memory
-    int maxTexture1D;          ///< Maximum number of elements in 1D images
-    int maxTexture2D[2];       ///< Maximum dimensions (width, height) of 2D images, in image elements
-    int maxTexture3D[3];       ///< Maximum dimensions (width, height, depth) of 3D images, in image elements
-    unsigned int* hdpMemFlushCntl;      ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register
-    unsigned int* hdpRegFlushCntl;      ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register
-    size_t memPitch;                 ///<Maximum pitch in bytes allowed by memory copies
-    size_t textureAlignment;         ///<Alignment requirement for textures
-    size_t texturePitchAlignment;    ///<Pitch alignment requirement for texture references bound to pitched memory
-    int kernelExecTimeoutEnabled;    ///<Run time limit for kernels executed on the device
-    int ECCEnabled;                  ///<Device has ECC support enabled
-    int tccDriver;                   ///< 1:If device is Tesla device using TCC driver, else 0
-    int cooperativeMultiDeviceUnmatchedFunc;        ///< HIP device supports cooperative launch on multiple
-                                                    ///devices with unmatched functions
-    int cooperativeMultiDeviceUnmatchedGridDim;     ///< HIP device supports cooperative launch on multiple
-                                                    ///devices with unmatched grid dimensions
-    int cooperativeMultiDeviceUnmatchedBlockDim;    ///< HIP device supports cooperative launch on multiple
-                                                    ///devices with unmatched block dimensions
-    int cooperativeMultiDeviceUnmatchedSharedMem;   ///< HIP device supports cooperative launch on multiple
-                                                    ///devices with unmatched shared memories
-    int isLargeBar;                  ///< 1: if it is a large PCI bar device, else 0
-    int asicRevision;                ///< Revision of the GPU in this device
-    int managedMemory;               ///< Device supports allocating managed memory on this system
-    int directManagedMemAccessFromHost; ///< Host can directly access managed memory on the device without migration
-    int concurrentManagedAccess;     ///< Device can coherently access managed memory concurrently with the CPU
-    int pageableMemoryAccess;        ///< Device supports coherently accessing pageable memory
-                                     ///< without calling hipHostRegister on it
-    int pageableMemoryAccessUsesHostPageTables; ///< Device accesses pageable memory via the host's page tables
-} hipDeviceProp_t;
-
-
-/**
- * Memory type (for pointer attributes)
- */
-typedef enum hipMemoryType {
-    hipMemoryTypeHost,    ///< Memory is physically located on host
-    hipMemoryTypeDevice,  ///< Memory is physically located on device. (see deviceId for specific
-                          ///< device)
-    hipMemoryTypeArray,  ///< Array memory, physically located on device. (see deviceId for specific
-                         ///< device)
-    hipMemoryTypeUnified  ///< Not used currently
-}hipMemoryType;
-
-
-/**
- * Pointer attributes
- */
-typedef struct hipPointerAttribute_t {
-    enum hipMemoryType memoryType;
-    int device;
-    void* devicePointer;
-    void* hostPointer;
-    int isManaged;
-    unsigned allocationFlags; /* flags specified when memory was allocated*/
-    /* peers? */
-} hipPointerAttribute_t;
-
-
-// hack to get these to show up in Doxygen:
-/**
- *     @defgroup GlobalDefs Global enum and defines
- *     @{
- *
- */
-
-// Ignoring error-code return values from hip APIs is discouraged. On C++17,
-// we can make that yield a warning
-#if __cplusplus >= 201703L
-#define __HIP_NODISCARD [[nodiscard]]
-#else
-#define __HIP_NODISCARD
-#endif
-
-/*
- * @brief hipError_t
- * @enum
- * @ingroup Enumerations
- */
-// Developer note - when updating these, update the hipErrorName and hipErrorString functions in
-// NVCC and HCC paths Also update the hipCUDAErrorTohipError function in NVCC path.
-
-typedef enum __HIP_NODISCARD hipError_t {
-    hipSuccess = 0,  ///< Successful completion.
-    hipErrorInvalidValue = 1,  ///< One or more of the parameters passed to the API call is NULL
-                               ///< or not in an acceptable range.
-    hipErrorOutOfMemory = 2,
-    // Deprecated
-    hipErrorMemoryAllocation = 2,  ///< Memory allocation error.
-    hipErrorNotInitialized = 3,
-    // Deprecated
-    hipErrorInitializationError = 3,
-    hipErrorDeinitialized = 4,
-    hipErrorProfilerDisabled = 5,
-    hipErrorProfilerNotInitialized = 6,
-    hipErrorProfilerAlreadyStarted = 7,
-    hipErrorProfilerAlreadyStopped = 8,
-    hipErrorInvalidConfiguration = 9,
-    hipErrorInvalidSymbol = 13,
-    hipErrorInvalidDevicePointer = 17,  ///< Invalid Device Pointer
-    hipErrorInvalidMemcpyDirection = 21,  ///< Invalid memory copy direction
-    hipErrorInsufficientDriver = 35,
-    hipErrorMissingConfiguration = 52,
-    hipErrorPriorLaunchFailure = 53,
-    hipErrorInvalidDeviceFunction = 98,
-    hipErrorNoDevice = 100,  ///< Call to hipGetDeviceCount returned 0 devices
-    hipErrorInvalidDevice = 101,  ///< DeviceID must be in range 0...#compute-devices.
-    hipErrorInvalidImage = 200,
-    hipErrorInvalidContext = 201,  ///< Produced when input context is invalid.
-    hipErrorContextAlreadyCurrent = 202,
-    hipErrorMapFailed = 205,
-    // Deprecated
-    hipErrorMapBufferObjectFailed = 205,  ///< Produced when the IPC memory attach failed from ROCr.
-    hipErrorUnmapFailed = 206,
-    hipErrorArrayIsMapped = 207,
-    hipErrorAlreadyMapped = 208,
-    hipErrorNoBinaryForGpu = 209,
-    hipErrorAlreadyAcquired = 210,
-    hipErrorNotMapped = 211,
-    hipErrorNotMappedAsArray = 212,
-    hipErrorNotMappedAsPointer = 213,
-    hipErrorECCNotCorrectable = 214,
-    hipErrorUnsupportedLimit = 215,
-    hipErrorContextAlreadyInUse = 216,
-    hipErrorPeerAccessUnsupported = 217,
-    hipErrorInvalidKernelFile = 218,  ///< In CUDA DRV, it is CUDA_ERROR_INVALID_PTX
-    hipErrorInvalidGraphicsContext = 219,
-    hipErrorInvalidSource = 300,
-    hipErrorFileNotFound = 301,
-    hipErrorSharedObjectSymbolNotFound = 302,
-    hipErrorSharedObjectInitFailed = 303,
-    hipErrorOperatingSystem = 304,
-    hipErrorInvalidHandle = 400,
-    // Deprecated
-    hipErrorInvalidResourceHandle = 400,  ///< Resource handle (hipEvent_t or hipStream_t) invalid.
-    hipErrorNotFound = 500,
-    hipErrorNotReady = 600,  ///< Indicates that asynchronous operations enqueued earlier are not
-                             ///< ready.  This is not actually an error, but is used to distinguish
-                             ///< from hipSuccess (which indicates completion).  APIs that return
-                             ///< this error include hipEventQuery and hipStreamQuery.
-    hipErrorIllegalAddress = 700,
-    hipErrorLaunchOutOfResources = 701,  ///< Out of resources error.
-    hipErrorLaunchTimeOut = 702,
-    hipErrorPeerAccessAlreadyEnabled =
-        704,  ///< Peer access was already enabled from the current device.
-    hipErrorPeerAccessNotEnabled =
-        705,  ///< Peer access was never enabled from the current device.
-    hipErrorSetOnActiveProcess = 708,
-    hipErrorAssert = 710,  ///< Produced when the kernel calls assert.
-    hipErrorHostMemoryAlreadyRegistered =
-        712,  ///< Produced when trying to lock a page-locked memory.
-    hipErrorHostMemoryNotRegistered =
-        713,  ///< Produced when trying to unlock a non-page-locked memory.
-    hipErrorLaunchFailure =
-        719,  ///< An exception occurred on the device while executing a kernel.
-    hipErrorCooperativeLaunchTooLarge =
-        720,  ///< This error indicates that the number of blocks launched per grid for a kernel
-              ///< that was launched via cooperative launch APIs exceeds the maximum number of
-              ///< allowed blocks for the current device
-    hipErrorNotSupported = 801,  ///< Produced when the hip API is not supported/implemented
-    hipErrorUnknown = 999,  //< Unknown error.
-    // HSA Runtime Error Codes start here.
-    hipErrorRuntimeMemory = 1052,  ///< HSA runtime memory call returned error.  Typically not seen
-                                   ///< in production systems.
-    hipErrorRuntimeOther = 1053,  ///< HSA runtime call other than memory returned error.  Typically
-                                  ///< not seen in production systems.
-    hipErrorTbd  ///< Marker that more error codes are needed.
-} hipError_t;
-
-#undef __HIP_NODISCARD
-
-/*
- * @brief hipDeviceAttribute_t
- * @enum
- * @ingroup Enumerations
- */
-typedef enum hipDeviceAttribute_t {
-    hipDeviceAttributeMaxThreadsPerBlock,       ///< Maximum number of threads per block.
-    hipDeviceAttributeMaxBlockDimX,             ///< Maximum x-dimension of a block.
-    hipDeviceAttributeMaxBlockDimY,             ///< Maximum y-dimension of a block.
-    hipDeviceAttributeMaxBlockDimZ,             ///< Maximum z-dimension of a block.
-    hipDeviceAttributeMaxGridDimX,              ///< Maximum x-dimension of a grid.
-    hipDeviceAttributeMaxGridDimY,              ///< Maximum y-dimension of a grid.
-    hipDeviceAttributeMaxGridDimZ,              ///< Maximum z-dimension of a grid.
-    hipDeviceAttributeMaxSharedMemoryPerBlock,  ///< Maximum shared memory available per block in
-                                                ///< bytes.
-    hipDeviceAttributeTotalConstantMemory,      ///< Constant memory size in bytes.
-    hipDeviceAttributeWarpSize,                 ///< Warp size in threads.
-    hipDeviceAttributeMaxRegistersPerBlock,  ///< Maximum number of 32-bit registers available to a
-                                             ///< thread block. This number is shared by all thread
-                                             ///< blocks simultaneously resident on a
-                                             ///< multiprocessor.
-    hipDeviceAttributeClockRate,             ///< Peak clock frequency in kilohertz.
-    hipDeviceAttributeMemoryClockRate,       ///< Peak memory clock frequency in kilohertz.
-    hipDeviceAttributeMemoryBusWidth,        ///< Global memory bus width in bits.
-    hipDeviceAttributeMultiprocessorCount,   ///< Number of multiprocessors on the device.
-    hipDeviceAttributeComputeMode,           ///< Compute mode that device is currently in.
-    hipDeviceAttributeL2CacheSize,  ///< Size of L2 cache in bytes. 0 if the device doesn't have L2
-                                    ///< cache.
-    hipDeviceAttributeMaxThreadsPerMultiProcessor,  ///< Maximum resident threads per
-                                                    ///< multiprocessor.
-    hipDeviceAttributeComputeCapabilityMajor,       ///< Major compute capability version number.
-    hipDeviceAttributeComputeCapabilityMinor,       ///< Minor compute capability version number.
-    hipDeviceAttributeConcurrentKernels,  ///< Device can possibly execute multiple kernels
-                                          ///< concurrently.
-    hipDeviceAttributePciBusId,           ///< PCI Bus ID.
-    hipDeviceAttributePciDeviceId,        ///< PCI Device ID.
-    hipDeviceAttributeMaxSharedMemoryPerMultiprocessor,  ///< Maximum Shared Memory Per
-                                                         ///< Multiprocessor.
-    hipDeviceAttributeIsMultiGpuBoard,                   ///< Multiple GPU devices.
-    hipDeviceAttributeIntegrated,                        ///< iGPU
-    hipDeviceAttributeCooperativeLaunch,                 ///< Support cooperative launch
-    hipDeviceAttributeCooperativeMultiDeviceLaunch,      ///< Support cooperative launch on multiple devices
-    hipDeviceAttributeMaxTexture1DWidth,    ///< Maximum number of elements in 1D images
-    hipDeviceAttributeMaxTexture2DWidth,    ///< Maximum dimension width of 2D images in image elements
-    hipDeviceAttributeMaxTexture2DHeight,   ///< Maximum dimension height of 2D images in image elements
-    hipDeviceAttributeMaxTexture3DWidth,    ///< Maximum dimension width of 3D images in image elements
-    hipDeviceAttributeMaxTexture3DHeight,   ///< Maximum dimensions height of 3D images in image elements
-    hipDeviceAttributeMaxTexture3DDepth,    ///< Maximum dimensions depth of 3D images in image elements
-
-    hipDeviceAttributeHdpMemFlushCntl,      ///< Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register
-    hipDeviceAttributeHdpRegFlushCntl,      ///< Address of the HDP_REG_COHERENCY_FLUSH_CNTL register
-
-    hipDeviceAttributeMaxPitch,             ///< Maximum pitch in bytes allowed by memory copies
-    hipDeviceAttributeTextureAlignment,     ///<Alignment requirement for textures
-    hipDeviceAttributeTexturePitchAlignment, ///<Pitch alignment requirement for 2D texture references bound to pitched memory;
-    hipDeviceAttributeKernelExecTimeout,    ///<Run time limit for kernels executed on the device
-    hipDeviceAttributeCanMapHostMemory,     ///<Device can map host memory into device address space
-    hipDeviceAttributeEccEnabled,           ///<Device has ECC support enabled
-
-    hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc,        ///< Supports cooperative launch on multiple
-                                                                  ///devices with unmatched functions
-    hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim,     ///< Supports cooperative launch on multiple
-                                                                  ///devices with unmatched grid dimensions
-    hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim,    ///< Supports cooperative launch on multiple
-                                                                  ///devices with unmatched block dimensions
-    hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem,   ///< Supports cooperative launch on multiple
-                                                                  ///devices with unmatched shared memories
-    hipDeviceAttributeAsicRevision,         ///< Revision of the GPU in this device
-    hipDeviceAttributeManagedMemory,        ///< Device supports allocating managed memory on this system
-    hipDeviceAttributeDirectManagedMemAccessFromHost, ///< Host can directly access managed memory on
-                                                      /// the device without migration
-    hipDeviceAttributeConcurrentManagedAccess,  ///< Device can coherently access managed memory
-                                                /// concurrently with the CPU
-    hipDeviceAttributePageableMemoryAccess,     ///< Device supports coherently accessing pageable memory
-                                                /// without calling hipHostRegister on it
-    hipDeviceAttributePageableMemoryAccessUsesHostPageTables, ///< Device accesses pageable memory via
-                                                              /// the host's page tables
-} hipDeviceAttribute_t;
-
-enum hipComputeMode {
-    hipComputeModeDefault = 0,
-    hipComputeModeExclusive = 1,
-    hipComputeModeProhibited = 2,
-    hipComputeModeExclusiveProcess = 3
-};
-
-/**
- *     @}
- */
-
-#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
-#include "hip/hcc_detail/hip_runtime_api.h"
-#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
-#include "hip/nvcc_detail/hip_runtime_api.h"
-#else
-#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
-#endif
-
-
-/**
- * @brief: C++ wrapper for hipMalloc
- *
- * Perform automatic type conversion to eliminate need for excessive typecasting (ie void**)
- *
- * __HIP_DISABLE_CPP_FUNCTIONS__ macro can be defined to suppress these
- * wrappers. It is useful for applications which need to obtain decltypes of
- * HIP runtime APIs.
- *
- * @see hipMalloc
- */
-#if defined(__cplusplus) && !defined(__HIP_DISABLE_CPP_FUNCTIONS__)
-template <class T>
-static inline hipError_t hipMalloc(T** devPtr, size_t size) {
-    return hipMalloc((void**)devPtr, size);
-}
-
-// Provide an override to automatically typecast the pointer type from void**, and also provide a
-// default for the flags.
-template <class T>
-static inline hipError_t hipHostMalloc(T** ptr, size_t size,
-                                       unsigned int flags = hipHostMallocDefault) {
-    return hipHostMalloc((void**)ptr, size, flags);
-}
-
-template <class T>
-static inline hipError_t hipMallocManaged(T** devPtr, size_t size,
-                                       unsigned int flags = hipMemAttachGlobal) {
-    return hipMallocManaged((void**)devPtr, size, flags);
-}
-#endif
-
-#endif
diff --git a/third_party/rocm/include/hip/hip_texture_types.h b/third_party/rocm/include/hip/hip_texture_types.h
deleted file mode 100644
index a7feab0..0000000
--- a/third_party/rocm/include/hip/hip_texture_types.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
-Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-
-#ifndef HIP_INCLUDE_HIP_HIP_TEXTURE_TYPES_H
-#define HIP_INCLUDE_HIP_HIP_TEXTURE_TYPES_H
-
-#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
-#include <hip/hcc_detail/hip_texture_types.h>
-#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
-#include <hip/nvcc_detail/hip_texture_types.h>
-#else
-#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
-#endif
-
-
-#endif
diff --git a/third_party/rocm/include/hip/hip_vector_types.h b/third_party/rocm/include/hip/hip_vector_types.h
deleted file mode 100644
index c1a0373..0000000
--- a/third_party/rocm/include/hip/hip_vector_types.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-//! hip_vector_types.h : Defines the HIP vector types.
-
-#ifndef HIP_INCLUDE_HIP_HIP_VECTOR_TYPES_H
-#define HIP_INCLUDE_HIP_HIP_VECTOR_TYPES_H
-
-#include <hip/hip_common.h>
-
-
-#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
-#if __cplusplus
-#include <hip/hcc_detail/hip_vector_types.h>
-#endif
-#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
-#include <vector_types.h>
-#else
-#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
-#endif
-
-#endif
diff --git a/third_party/rocm/include/hip/hip_version.h b/third_party/rocm/include/hip/hip_version.h
deleted file mode 100644
index 2fdb247..0000000
--- a/third_party/rocm/include/hip/hip_version.h
+++ /dev/null
@@ -1,14 +0,0 @@
-// Auto-generated by cmake
-
-#ifndef HIP_VERSION_H
-#define HIP_VERSION_H
-
-#define HIP_VERSION_MAJOR 4
-#define HIP_VERSION_MINOR 1
-#define HIP_VERSION_PATCH 21114
-#define HIP_VERSION       (HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR)
-
-#define __HIP_HAS_GET_PCH 1
-
-#endif
-
diff --git a/third_party/rocm/include/hip/hiprtc.h b/third_party/rocm/include/hip/hiprtc.h
deleted file mode 100644
index 22d78d2..0000000
--- a/third_party/rocm/include/hip/hiprtc.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-#pragma once
-
-#include <hip/hip_common.h>
-
-#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
-    #include <hip/hcc_detail/hiprtc.h>
-#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
-    #include <hip/nvcc_detail/nvrtc.h>
-#else
-    #error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
-#endif
\ No newline at end of file
diff --git a/third_party/rocm/include/hip/library_types.h b/third_party/rocm/include/hip/library_types.h
deleted file mode 100644
index 4a988df..0000000
--- a/third_party/rocm/include/hip/library_types.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_LIBRARY_TYPES_H
-#define HIP_INCLUDE_HIP_LIBRARY_TYPES_H
-
-#include <hip/hip_common.h>
-
-#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
-#include <hip/hcc_detail/library_types.h>
-#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
-#include "library_types.h"
-#else
-#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
-#endif
-
-#endif
diff --git a/third_party/rocm/include/hip/math_functions.h b/third_party/rocm/include/hip/math_functions.h
deleted file mode 100644
index 2dfec45..0000000
--- a/third_party/rocm/include/hip/math_functions.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_MATH_FUNCTIONS_H
-#define HIP_INCLUDE_HIP_MATH_FUNCTIONS_H
-
-// Some standard header files, these are included by hc.hpp and so want to make them avail on both
-// paths to provide a consistent include env and avoid "missing symbol" errors that only appears
-// on NVCC path:
-
-#include <hip/hip_common.h>
-
-#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
-#include <hip/hcc_detail/math_functions.h>
-#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
-//#include <hip/nvcc_detail/math_functions.h>
-#else
-#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
-#endif
-
-#endif
diff --git a/third_party/rocm/include/hip/nvcc_detail/channel_descriptor.h b/third_party/rocm/include/hip/nvcc_detail/channel_descriptor.h
deleted file mode 100644
index c3e9dc1..0000000
--- a/third_party/rocm/include/hip/nvcc_detail/channel_descriptor.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_CHANNEL_DESCRIPTOR_H
-#define HIP_INCLUDE_HIP_NVCC_DETAIL_CHANNEL_DESCRIPTOR_H
-
-#include "channel_descriptor.h"
-
-#endif
diff --git a/third_party/rocm/include/hip/nvcc_detail/hip_complex.h b/third_party/rocm/include/hip/nvcc_detail/hip_complex.h
deleted file mode 100644
index d0e45d2..0000000
--- a/third_party/rocm/include/hip/nvcc_detail/hip_complex.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_COMPLEX_H
-#define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_COMPLEX_H
-
-#include "cuComplex.h"
-
-typedef cuFloatComplex hipFloatComplex;
-
-__device__ __host__ static inline float hipCrealf(hipFloatComplex z) { return cuCrealf(z); }
-
-__device__ __host__ static inline float hipCimagf(hipFloatComplex z) { return cuCimagf(z); }
-
-__device__ __host__ static inline hipFloatComplex make_hipFloatComplex(float a, float b) {
-    return make_cuFloatComplex(a, b);
-}
-
-__device__ __host__ static inline hipFloatComplex hipConjf(hipFloatComplex z) { return cuConjf(z); }
-
-__device__ __host__ static inline float hipCsqabsf(hipFloatComplex z) {
-    return cuCabsf(z) * cuCabsf(z);
-}
-
-__device__ __host__ static inline hipFloatComplex hipCaddf(hipFloatComplex p, hipFloatComplex q) {
-    return cuCaddf(p, q);
-}
-
-__device__ __host__ static inline hipFloatComplex hipCsubf(hipFloatComplex p, hipFloatComplex q) {
-    return cuCsubf(p, q);
-}
-
-__device__ __host__ static inline hipFloatComplex hipCmulf(hipFloatComplex p, hipFloatComplex q) {
-    return cuCmulf(p, q);
-}
-
-__device__ __host__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hipFloatComplex q) {
-    return cuCdivf(p, q);
-}
-
-__device__ __host__ static inline float hipCabsf(hipFloatComplex z) { return cuCabsf(z); }
-
-typedef cuDoubleComplex hipDoubleComplex;
-
-__device__ __host__ static inline double hipCreal(hipDoubleComplex z) { return cuCreal(z); }
-
-__device__ __host__ static inline double hipCimag(hipDoubleComplex z) { return cuCimag(z); }
-
-__device__ __host__ static inline hipDoubleComplex make_hipDoubleComplex(double a, double b) {
-    return make_cuDoubleComplex(a, b);
-}
-
-__device__ __host__ static inline hipDoubleComplex hipConj(hipDoubleComplex z) { return cuConj(z); }
-
-__device__ __host__ static inline double hipCsqabs(hipDoubleComplex z) {
-    return cuCabs(z) * cuCabs(z);
-}
-
-__device__ __host__ static inline hipDoubleComplex hipCadd(hipDoubleComplex p, hipDoubleComplex q) {
-    return cuCadd(p, q);
-}
-
-__device__ __host__ static inline hipDoubleComplex hipCsub(hipDoubleComplex p, hipDoubleComplex q) {
-    return cuCsub(p, q);
-}
-
-__device__ __host__ static inline hipDoubleComplex hipCmul(hipDoubleComplex p, hipDoubleComplex q) {
-    return cuCmul(p, q);
-}
-
-__device__ __host__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, hipDoubleComplex q) {
-    return cuCdiv(p, q);
-}
-
-__device__ __host__ static inline double hipCabs(hipDoubleComplex z) { return cuCabs(z); }
-
-typedef cuFloatComplex hipComplex;
-
-__device__ __host__ static inline hipComplex make_Complex(float x, float y) {
-    return make_cuComplex(x, y);
-}
-
-__device__ __host__ static inline hipFloatComplex hipComplexDoubleToFloat(hipDoubleComplex z) {
-    return cuComplexDoubleToFloat(z);
-}
-
-__device__ __host__ static inline hipDoubleComplex hipComplexFloatToDouble(hipFloatComplex z) {
-    return cuComplexFloatToDouble(z);
-}
-
-__device__ __host__ static inline hipComplex hipCfmaf(hipComplex p, hipComplex q, hipComplex r) {
-    return cuCfmaf(p, q, r);
-}
-
-__device__ __host__ static inline hipDoubleComplex hipCfma(hipDoubleComplex p, hipDoubleComplex q,
-                                                           hipDoubleComplex r) {
-    return cuCfma(p, q, r);
-}
-
-#endif
diff --git a/third_party/rocm/include/hip/nvcc_detail/hip_cooperative_groups.h b/third_party/rocm/include/hip/nvcc_detail/hip_cooperative_groups.h
deleted file mode 100644
index 113e600..0000000
--- a/third_party/rocm/include/hip/nvcc_detail/hip_cooperative_groups.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_COOPERATIVE_GROUPS_H
-#define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_COOPERATIVE_GROUPS_H
-
-// Include CUDA headers
-#include <cuda_runtime.h>
-#include <cooperative_groups.h>
-
-// Include HIP wrapper headers around CUDA
-#include <hip/hip_runtime.h>
-#include <hip/hip_runtime_api.h>
-
-#endif // HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_COOPERATIVE_GROUPS_H
diff --git a/third_party/rocm/include/hip/nvcc_detail/hip_runtime.h b/third_party/rocm/include/hip/nvcc_detail/hip_runtime.h
deleted file mode 100644
index e7c3eaf..0000000
--- a/third_party/rocm/include/hip/nvcc_detail/hip_runtime.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_H
-#define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_H
-
-#include <cuda_runtime.h>
-
-#include <hip/hip_runtime_api.h>
-
-#define HIP_KERNEL_NAME(...) __VA_ARGS__
-
-typedef int hipLaunchParm;
-
-#define hipLaunchKernelGGLInternal(kernelName, numBlocks, numThreads, memPerBlock, streamId, ...)  \
-    do {                                                                                           \
-        kernelName<<<numBlocks, numThreads, memPerBlock, streamId>>>(__VA_ARGS__);                 \
-    } while (0)
-
-#define hipLaunchKernelGGL(kernelName, ...)  hipLaunchKernelGGLInternal((kernelName), __VA_ARGS__)
-
-#define hipReadModeElementType cudaReadModeElementType
-
-#ifdef __CUDA_ARCH__
-
-
-// 32-bit Atomics:
-#define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (__CUDA_ARCH__ >= 110)
-#define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (__CUDA_ARCH__ >= 110)
-#define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (__CUDA_ARCH__ >= 120)
-#define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (__CUDA_ARCH__ >= 120)
-#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (__CUDA_ARCH__ >= 200)
-
-// 64-bit Atomics:
-#define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (__CUDA_ARCH__ >= 200)
-#define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (__CUDA_ARCH__ >= 120)
-
-// Doubles
-#define __HIP_ARCH_HAS_DOUBLES__ (__CUDA_ARCH__ >= 120)
-
-// warp cross-lane operations:
-#define __HIP_ARCH_HAS_WARP_VOTE__ (__CUDA_ARCH__ >= 120)
-#define __HIP_ARCH_HAS_WARP_BALLOT__ (__CUDA_ARCH__ >= 200)
-#define __HIP_ARCH_HAS_WARP_SHUFFLE__ (__CUDA_ARCH__ >= 300)
-#define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (__CUDA_ARCH__ >= 350)
-
-// sync
-#define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (__CUDA_ARCH__ >= 200)
-#define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (__CUDA_ARCH__ >= 200)
-
-// misc
-#define __HIP_ARCH_HAS_SURFACE_FUNCS__ (__CUDA_ARCH__ >= 200)
-#define __HIP_ARCH_HAS_3DGRID__ (__CUDA_ARCH__ >= 200)
-#define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (__CUDA_ARCH__ >= 350)
-
-#endif
-
-#ifdef __CUDACC__
-
-
-#define hipThreadIdx_x threadIdx.x
-#define hipThreadIdx_y threadIdx.y
-#define hipThreadIdx_z threadIdx.z
-
-#define hipBlockIdx_x blockIdx.x
-#define hipBlockIdx_y blockIdx.y
-#define hipBlockIdx_z blockIdx.z
-
-#define hipBlockDim_x blockDim.x
-#define hipBlockDim_y blockDim.y
-#define hipBlockDim_z blockDim.z
-
-#define hipGridDim_x gridDim.x
-#define hipGridDim_y gridDim.y
-#define hipGridDim_z gridDim.z
-
-#define HIP_SYMBOL(X) &X
-
-/**
- * extern __shared__
- */
-
-#define HIP_DYNAMIC_SHARED(type, var) extern __shared__ type var[];
-
-#define HIP_DYNAMIC_SHARED_ATTRIBUTE
-
-#ifdef __HIP_DEVICE_COMPILE__
-#define abort_()                                                                                    \
-    { asm("trap;"); }
-#undef assert
-#define assert(COND)                                                                               \
-    {                                                                                              \
-        if (!COND) {                                                                               \
-            abort_();                                                                               \
-        }                                                                                          \
-    }
-#endif
-
-#define __clock() clock()
-#define __clock64() clock64()
-
-#endif
-
-#endif
diff --git a/third_party/rocm/include/hip/nvcc_detail/hip_runtime_api.h b/third_party/rocm/include/hip/nvcc_detail/hip_runtime_api.h
deleted file mode 100644
index 257d795..0000000
--- a/third_party/rocm/include/hip/nvcc_detail/hip_runtime_api.h
+++ /dev/null
@@ -1,2045 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
-#define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
-
-#include <cuda_runtime_api.h>
-#include <cuda.h>
-#include <cuda_profiler_api.h>
-#include <cuda_fp16.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef __cplusplus
-#define __dparm(x) = x
-#else
-#define __dparm(x)
-#endif
-
-// Add Deprecated Support for CUDA Mapped HIP APIs
-#if defined(__DOXYGEN_ONLY__) || defined(HIP_ENABLE_DEPRECATED)
-#define __HIP_DEPRECATED
-#elif defined(_MSC_VER)
-#define __HIP_DEPRECATED __declspec(deprecated)
-#elif defined(__GNUC__)
-#define __HIP_DEPRECATED __attribute__((deprecated))
-#else
-#define __HIP_DEPRECATED
-#endif
-
-
-// TODO -move to include/hip_runtime_api.h as a common implementation.
-/**
- * Memory copy types
- *
- */
-typedef enum hipMemcpyKind {
-    hipMemcpyHostToHost,
-    hipMemcpyHostToDevice,
-    hipMemcpyDeviceToHost,
-    hipMemcpyDeviceToDevice,
-    hipMemcpyDefault
-} hipMemcpyKind;
-
-// hipDataType
-#define hipDataType cudaDataType
-#define HIP_R_16F CUDA_R_16F
-#define HIP_R_32F CUDA_R_32F
-#define HIP_R_64F CUDA_R_64F
-#define HIP_C_16F CUDA_C_16F
-#define HIP_C_32F CUDA_C_32F
-#define HIP_C_64F CUDA_C_64F
-
-// hipLibraryPropertyType
-#define hipLibraryPropertyType libraryPropertyType
-#define HIP_LIBRARY_MAJOR_VERSION MAJOR_VERSION
-#define HIP_LIBRARY_MINOR_VERSION MINOR_VERSION
-#define HIP_LIBRARY_PATCH_LEVEL PATCH_LEVEL
-
-#define HIP_ARRAY_DESCRIPTOR CUDA_ARRAY_DESCRIPTOR
-
-//hipArray_Format
-#define HIP_AD_FORMAT_UNSIGNED_INT8   CU_AD_FORMAT_UNSIGNED_INT8
-#define HIP_AD_FORMAT_UNSIGNED_INT16  CU_AD_FORMAT_UNSIGNED_INT16
-#define HIP_AD_FORMAT_UNSIGNED_INT32  CU_AD_FORMAT_UNSIGNED_INT32
-#define HIP_AD_FORMAT_SIGNED_INT8     CU_AD_FORMAT_SIGNED_INT8
-#define HIP_AD_FORMAT_SIGNED_INT16    CU_AD_FORMAT_SIGNED_INT16
-#define HIP_AD_FORMAT_SIGNED_INT32    CU_AD_FORMAT_SIGNED_INT32
-#define HIP_AD_FORMAT_HALF            CU_AD_FORMAT_HALF
-#define HIP_AD_FORMAT_FLOAT           CU_AD_FORMAT_FLOAT
-
-// hipArray_Format
-#define hipArray_Format CUarray_format
-
-inline static CUarray_format hipArray_FormatToCUarray_format(
-    hipArray_Format format) {
-    switch (format) {
-        case HIP_AD_FORMAT_UNSIGNED_INT8:
-            return CU_AD_FORMAT_UNSIGNED_INT8;
-        case HIP_AD_FORMAT_UNSIGNED_INT16:
-            return CU_AD_FORMAT_UNSIGNED_INT16;
-        case HIP_AD_FORMAT_UNSIGNED_INT32:
-            return CU_AD_FORMAT_UNSIGNED_INT32;
-        case HIP_AD_FORMAT_SIGNED_INT8:
-            return CU_AD_FORMAT_SIGNED_INT8;
-        case HIP_AD_FORMAT_SIGNED_INT16:
-            return CU_AD_FORMAT_SIGNED_INT16;
-        case HIP_AD_FORMAT_SIGNED_INT32:
-            return CU_AD_FORMAT_SIGNED_INT32;
-        case HIP_AD_FORMAT_HALF:
-            return CU_AD_FORMAT_HALF;
-        case HIP_AD_FORMAT_FLOAT:
-            return CU_AD_FORMAT_FLOAT;
-        default:
-            return CU_AD_FORMAT_UNSIGNED_INT8;
-    }
-}
-
-#define HIP_TR_ADDRESS_MODE_WRAP   CU_TR_ADDRESS_MODE_WRAP
-#define HIP_TR_ADDRESS_MODE_CLAMP  CU_TR_ADDRESS_MODE_CLAMP
-#define HIP_TR_ADDRESS_MODE_MIRROR CU_TR_ADDRESS_MODE_MIRROR
-#define HIP_TR_ADDRESS_MODE_BORDER CU_TR_ADDRESS_MODE_BORDER
-
-// hipAddress_mode
-#define hipAddress_mode CUaddress_mode
-
-inline static CUaddress_mode hipAddress_modeToCUaddress_mode(
-    hipAddress_mode mode) {
-    switch (mode) {
-        case HIP_TR_ADDRESS_MODE_WRAP:
-            return CU_TR_ADDRESS_MODE_WRAP;
-        case HIP_TR_ADDRESS_MODE_CLAMP:
-            return CU_TR_ADDRESS_MODE_CLAMP;
-        case HIP_TR_ADDRESS_MODE_MIRROR:
-            return CU_TR_ADDRESS_MODE_MIRROR;
-        case HIP_TR_ADDRESS_MODE_BORDER:
-            return CU_TR_ADDRESS_MODE_BORDER;
-        default:
-            return CU_TR_ADDRESS_MODE_WRAP;
-    }
-}
-
-#define HIP_TR_FILTER_MODE_POINT   CU_TR_FILTER_MODE_POINT
-#define HIP_TR_FILTER_MODE_LINEAR  CU_TR_FILTER_MODE_LINEAR
-
-// hipFilter_mode
-#define hipFilter_mode CUfilter_mode
-
-inline static CUfilter_mode hipFilter_mode_enumToCUfilter_mode(
-    hipFilter_mode mode) {
-    switch (mode) {
-        case HIP_TR_FILTER_MODE_POINT:
-            return CU_TR_FILTER_MODE_POINT;
-        case HIP_TR_FILTER_MODE_LINEAR:
-            return CU_TR_FILTER_MODE_LINEAR;
-        default:
-            return CU_TR_FILTER_MODE_POINT;
-    }
-}
-
-//hipResourcetype
-#define HIP_RESOURCE_TYPE_ARRAY            CU_RESOURCE_TYPE_ARRAY
-#define HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY  CU_RESOURCE_TYPE_MIPMAPPED_ARRAY
-#define HIP_RESOURCE_TYPE_LINEAR           CU_RESOURCE_TYPE_LINEAR
-#define HIP_RESOURCE_TYPE_PITCH2D          CU_RESOURCE_TYPE_PITCH2D
-
-// hipResourcetype
-#define hipResourcetype CUresourcetype
-
-inline static CUresourcetype hipResourcetype_enumToCUresourcetype(
-    hipResourcetype resType) {
-    switch (resType) {
-        case HIP_RESOURCE_TYPE_ARRAY:
-            return CU_RESOURCE_TYPE_ARRAY;
-        case HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY:
-            return CU_RESOURCE_TYPE_MIPMAPPED_ARRAY;
-        case HIP_RESOURCE_TYPE_LINEAR:
-            return CU_RESOURCE_TYPE_LINEAR;
-        case HIP_RESOURCE_TYPE_PITCH2D:
-            return CU_RESOURCE_TYPE_PITCH2D;
-        default:
-            return CU_RESOURCE_TYPE_ARRAY;
-    }
-}
-
-#define hipTexRef CUtexref
-#define hiparray CUarray
-
-// hipTextureAddressMode
-typedef enum cudaTextureAddressMode hipTextureAddressMode;
-#define hipAddressModeWrap cudaAddressModeWrap
-#define hipAddressModeClamp cudaAddressModeClamp
-#define hipAddressModeMirror cudaAddressModeMirror
-#define hipAddressModeBorder cudaAddressModeBorder
-
-// hipTextureFilterMode
-typedef enum cudaTextureFilterMode hipTextureFilterMode;
-#define hipFilterModePoint cudaFilterModePoint
-#define hipFilterModeLinear cudaFilterModeLinear
-
-// hipTextureReadMode
-typedef enum cudaTextureReadMode hipTextureReadMode;
-#define hipReadModeElementType cudaReadModeElementType
-#define hipReadModeNormalizedFloat cudaReadModeNormalizedFloat
-
-// hipChannelFormatKind
-typedef enum cudaChannelFormatKind hipChannelFormatKind;
-#define hipChannelFormatKindSigned      cudaChannelFormatKindSigned
-#define hipChannelFormatKindUnsigned    cudaChannelFormatKindUnsigned
-#define hipChannelFormatKindFloat       cudaChannelFormatKindFloat
-#define hipChannelFormatKindNone        cudaChannelFormatKindNone
-
-#define hipSurfaceBoundaryMode cudaSurfaceBoundaryMode
-#define hipBoundaryModeZero cudaBoundaryModeZero
-#define hipBoundaryModeTrap cudaBoundaryModeTrap
-#define hipBoundaryModeClamp cudaBoundaryModeClamp
-
-// hipFuncCache
-#define hipFuncCachePreferNone cudaFuncCachePreferNone
-#define hipFuncCachePreferShared cudaFuncCachePreferShared
-#define hipFuncCachePreferL1 cudaFuncCachePreferL1
-#define hipFuncCachePreferEqual cudaFuncCachePreferEqual
-
-// hipResourceType
-#define hipResourceType cudaResourceType
-#define hipResourceTypeArray cudaResourceTypeArray
-#define hipResourceTypeMipmappedArray cudaResourceTypeMipmappedArray
-#define hipResourceTypeLinear cudaResourceTypeLinear
-#define hipResourceTypePitch2D cudaResourceTypePitch2D
-//
-// hipErrorNoDevice.
-
-
-//! Flags that can be used with hipEventCreateWithFlags:
-#define hipEventDefault cudaEventDefault
-#define hipEventBlockingSync cudaEventBlockingSync
-#define hipEventDisableTiming cudaEventDisableTiming
-#define hipEventInterprocess cudaEventInterprocess
-#define hipEventReleaseToDevice 0 /* no-op on CUDA platform */
-#define hipEventReleaseToSystem 0 /* no-op on CUDA platform */
-
-
-#define hipHostMallocDefault cudaHostAllocDefault
-#define hipHostMallocPortable cudaHostAllocPortable
-#define hipHostMallocMapped cudaHostAllocMapped
-#define hipHostMallocWriteCombined cudaHostAllocWriteCombined
-#define hipHostMallocCoherent 0x0
-#define hipHostMallocNonCoherent 0x0
-
-#define hipMemAttachGlobal cudaMemAttachGlobal
-#define hipMemAttachHost cudaMemAttachHost
-
-#define hipHostRegisterDefault cudaHostRegisterDefault
-#define hipHostRegisterPortable cudaHostRegisterPortable
-#define hipHostRegisterMapped cudaHostRegisterMapped
-#define hipHostRegisterIoMemory cudaHostRegisterIoMemory
-
-#define HIP_LAUNCH_PARAM_BUFFER_POINTER CU_LAUNCH_PARAM_BUFFER_POINTER
-#define HIP_LAUNCH_PARAM_BUFFER_SIZE CU_LAUNCH_PARAM_BUFFER_SIZE
-#define HIP_LAUNCH_PARAM_END CU_LAUNCH_PARAM_END
-#define hipLimitMallocHeapSize cudaLimitMallocHeapSize
-#define hipIpcMemLazyEnablePeerAccess cudaIpcMemLazyEnablePeerAccess
-
-#define hipOccupancyDefault cudaOccupancyDefault
-
-#define hipCooperativeLaunchMultiDeviceNoPreSync    \
-        cudaCooperativeLaunchMultiDeviceNoPreSync
-#define hipCooperativeLaunchMultiDeviceNoPostSync   \
-        cudaCooperativeLaunchMultiDeviceNoPostSync
-
-
-// enum CUjit_option redefines
-#define hipJitOptionMaxRegisters CU_JIT_MAX_REGISTERS
-#define hipJitOptionThreadsPerBlock CU_JIT_THREADS_PER_BLOCK
-#define hipJitOptionWallTime CU_JIT_WALL_TIME
-#define hipJitOptionInfoLogBuffer CU_JIT_INFO_LOG_BUFFER
-#define hipJitOptionInfoLogBufferSizeBytes CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES
-#define hipJitOptionErrorLogBuffer CU_JIT_ERROR_LOG_BUFFER
-#define hipJitOptionErrorLogBufferSizeBytes CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES
-#define hipJitOptionOptimizationLevel CU_JIT_OPTIMIZATION_LEVEL
-#define hipJitOptionTargetFromContext CU_JIT_TARGET_FROM_CUCONTEXT
-#define hipJitOptionTarget CU_JIT_TARGET
-#define hipJitOptionFallbackStrategy CU_JIT_FALLBACK_STRATEGY
-#define hipJitOptionGenerateDebugInfo CU_JIT_GENERATE_DEBUG_INFO
-#define hipJitOptionLogVerbose CU_JIT_LOG_VERBOSE
-#define hipJitOptionGenerateLineInfo CU_JIT_GENERATE_LINE_INFO
-#define hipJitOptionCacheMode CU_JIT_CACHE_MODE
-#define hipJitOptionSm3xOpt CU_JIT_NEW_SM3X_OPT
-#define hipJitOptionFastCompile CU_JIT_FAST_COMPILE
-#define hipJitOptionNumOptions CU_JIT_NUM_OPTIONS
-
-typedef cudaEvent_t hipEvent_t;
-typedef cudaStream_t hipStream_t;
-typedef cudaIpcEventHandle_t hipIpcEventHandle_t;
-typedef cudaIpcMemHandle_t hipIpcMemHandle_t;
-typedef enum cudaLimit hipLimit_t;
-typedef enum cudaFuncAttribute hipFuncAttribute;
-typedef enum cudaFuncCache hipFuncCache_t;
-typedef CUcontext hipCtx_t;
-typedef enum cudaSharedMemConfig hipSharedMemConfig;
-typedef CUfunc_cache hipFuncCache;
-typedef CUjit_option hipJitOption;
-typedef CUdevice hipDevice_t;
-typedef enum cudaDeviceP2PAttr hipDeviceP2PAttr;
-#define hipDevP2PAttrPerformanceRank cudaDevP2PAttrPerformanceRank
-#define hipDevP2PAttrAccessSupported cudaDevP2PAttrAccessSupported
-#define hipDevP2PAttrNativeAtomicSupported cudaDevP2PAttrNativeAtomicSupported
-#define hipDevP2PAttrHipArrayAccessSupported cudaDevP2PAttrCudaArrayAccessSupported
-#define hipFuncAttributeMaxDynamicSharedMemorySize cudaFuncAttributeMaxDynamicSharedMemorySize
-#define hipFuncAttributePreferredSharedMemoryCarveout cudaFuncAttributePreferredSharedMemoryCarveout
-
-typedef CUmodule hipModule_t;
-typedef CUfunction hipFunction_t;
-typedef CUdeviceptr hipDeviceptr_t;
-typedef struct cudaArray hipArray;
-typedef struct cudaArray* hipArray_t;
-typedef struct cudaArray* hipArray_const_t;
-typedef struct cudaFuncAttributes hipFuncAttributes;
-typedef struct cudaLaunchParams hipLaunchParams;
-#define hipFunction_attribute CUfunction_attribute
-#define hip_Memcpy2D CUDA_MEMCPY2D
-#define hipMemcpy3DParms cudaMemcpy3DParms
-#define hipArrayDefault cudaArrayDefault
-#define hipArrayLayered cudaArrayLayered
-#define hipArraySurfaceLoadStore cudaArraySurfaceLoadStore
-#define hipArrayCubemap cudaArrayCubemap
-#define hipArrayTextureGather cudaArrayTextureGather
-
-typedef cudaTextureObject_t hipTextureObject_t;
-typedef cudaSurfaceObject_t hipSurfaceObject_t;
-#define hipTextureType1D cudaTextureType1D
-#define hipTextureType1DLayered cudaTextureType1DLayered
-#define hipTextureType2D cudaTextureType2D
-#define hipTextureType2DLayered cudaTextureType2DLayered
-#define hipTextureType3D cudaTextureType3D
-#define hipDeviceMapHost cudaDeviceMapHost
-
-typedef struct cudaExtent hipExtent;
-typedef struct cudaPitchedPtr hipPitchedPtr;
-#define make_hipExtent make_cudaExtent
-#define make_hipPos make_cudaPos
-#define make_hipPitchedPtr make_cudaPitchedPtr
-// Flags that can be used with hipStreamCreateWithFlags
-#define hipStreamDefault cudaStreamDefault
-#define hipStreamNonBlocking cudaStreamNonBlocking
-
-typedef struct cudaChannelFormatDesc hipChannelFormatDesc;
-typedef struct cudaResourceDesc hipResourceDesc;
-typedef struct cudaTextureDesc hipTextureDesc;
-typedef struct cudaResourceViewDesc hipResourceViewDesc;
-// adding code for hipmemSharedConfig
-#define hipSharedMemBankSizeDefault cudaSharedMemBankSizeDefault
-#define hipSharedMemBankSizeFourByte cudaSharedMemBankSizeFourByte
-#define hipSharedMemBankSizeEightByte cudaSharedMemBankSizeEightByte
-
-//Function Attributes
-#define HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK
-#define HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES
-#define HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES
-#define HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES
-#define HIP_FUNC_ATTRIBUTE_NUM_REGS CU_FUNC_ATTRIBUTE_NUM_REGS
-#define HIP_FUNC_ATTRIBUTE_PTX_VERSION CU_FUNC_ATTRIBUTE_PTX_VERSION
-#define HIP_FUNC_ATTRIBUTE_BINARY_VERSION CU_FUNC_ATTRIBUTE_BINARY_VERSION
-#define HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA CU_FUNC_ATTRIBUTE_CACHE_MODE_CA
-#define HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES
-#define HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT
-#define HIP_FUNC_ATTRIBUTE_MAX CU_FUNC_ATTRIBUTE_MAX
-
-#if CUDA_VERSION >= 9000
-#define __shfl(...)      __shfl_sync(0xffffffff, __VA_ARGS__)
-#define __shfl_up(...)   __shfl_up_sync(0xffffffff, __VA_ARGS__)
-#define __shfl_down(...) __shfl_down_sync(0xffffffff, __VA_ARGS__)
-#define __shfl_xor(...)  __shfl_xor_sync(0xffffffff, __VA_ARGS__)
-#endif // CUDA_VERSION >= 9000
-
-inline static hipError_t hipCUDAErrorTohipError(cudaError_t cuError) {
-    switch (cuError) {
-        case cudaSuccess:
-            return hipSuccess;
-        case cudaErrorProfilerDisabled:
-            return hipErrorProfilerDisabled;
-        case cudaErrorProfilerNotInitialized:
-            return hipErrorProfilerNotInitialized;
-        case cudaErrorProfilerAlreadyStarted:
-            return hipErrorProfilerAlreadyStarted;
-        case cudaErrorProfilerAlreadyStopped:
-            return hipErrorProfilerAlreadyStopped;
-        case cudaErrorInsufficientDriver:
-            return hipErrorInsufficientDriver;
-        case cudaErrorUnsupportedLimit:
-            return hipErrorUnsupportedLimit;
-        case cudaErrorPeerAccessUnsupported:
-            return hipErrorPeerAccessUnsupported;
-        case cudaErrorInvalidGraphicsContext:
-            return hipErrorInvalidGraphicsContext;
-        case cudaErrorSharedObjectSymbolNotFound:
-            return hipErrorSharedObjectSymbolNotFound;
-        case cudaErrorSharedObjectInitFailed:
-            return hipErrorSharedObjectInitFailed;
-        case cudaErrorOperatingSystem:
-            return hipErrorOperatingSystem;
-        case cudaErrorSetOnActiveProcess:
-            return hipErrorSetOnActiveProcess;
-        case cudaErrorIllegalAddress:
-            return hipErrorIllegalAddress;
-        case cudaErrorInvalidSymbol:
-            return hipErrorInvalidSymbol;
-        case cudaErrorMissingConfiguration:
-            return hipErrorMissingConfiguration;
-        case cudaErrorMemoryAllocation:
-            return hipErrorOutOfMemory;
-        case cudaErrorInitializationError:
-            return hipErrorNotInitialized;
-        case cudaErrorLaunchFailure:
-            return hipErrorLaunchFailure;
-        case cudaErrorCooperativeLaunchTooLarge:
-            return hipErrorCooperativeLaunchTooLarge;
-        case cudaErrorPriorLaunchFailure:
-            return hipErrorPriorLaunchFailure;
-        case cudaErrorLaunchOutOfResources:
-            return hipErrorLaunchOutOfResources;
-        case cudaErrorInvalidDeviceFunction:
-            return hipErrorInvalidDeviceFunction;
-        case cudaErrorInvalidConfiguration:
-            return hipErrorInvalidConfiguration;
-        case cudaErrorInvalidDevice:
-            return hipErrorInvalidDevice;
-        case cudaErrorInvalidValue:
-            return hipErrorInvalidValue;
-        case cudaErrorInvalidDevicePointer:
-            return hipErrorInvalidDevicePointer;
-        case cudaErrorInvalidMemcpyDirection:
-            return hipErrorInvalidMemcpyDirection;
-        case cudaErrorInvalidResourceHandle:
-            return hipErrorInvalidHandle;
-        case cudaErrorNotReady:
-            return hipErrorNotReady;
-        case cudaErrorNoDevice:
-            return hipErrorNoDevice;
-        case cudaErrorPeerAccessAlreadyEnabled:
-            return hipErrorPeerAccessAlreadyEnabled;
-        case cudaErrorPeerAccessNotEnabled:
-            return hipErrorPeerAccessNotEnabled;
-        case cudaErrorHostMemoryAlreadyRegistered:
-            return hipErrorHostMemoryAlreadyRegistered;
-        case cudaErrorHostMemoryNotRegistered:
-            return hipErrorHostMemoryNotRegistered;
-        case cudaErrorMapBufferObjectFailed:
-            return hipErrorMapFailed;
-        case cudaErrorAssert:
-            return hipErrorAssert;
-        case cudaErrorNotSupported:
-            return hipErrorNotSupported;
-        case cudaErrorCudartUnloading:
-            return hipErrorDeinitialized;
-        case cudaErrorInvalidKernelImage:
-            return hipErrorInvalidImage;
-        case cudaErrorUnmapBufferObjectFailed:
-            return hipErrorUnmapFailed;
-        case cudaErrorNoKernelImageForDevice:
-            return hipErrorNoBinaryForGpu;
-        case cudaErrorECCUncorrectable:
-            return hipErrorECCNotCorrectable;
-        case cudaErrorDeviceAlreadyInUse:
-            return hipErrorContextAlreadyInUse;
-        case cudaErrorInvalidPtx:
-            return hipErrorInvalidKernelFile;
-        case cudaErrorLaunchTimeout:
-            return hipErrorLaunchTimeOut;
-#if CUDA_VERSION >= 10010
-        case cudaErrorInvalidSource:
-            return hipErrorInvalidSource;
-        case cudaErrorFileNotFound:
-            return hipErrorFileNotFound;
-        case cudaErrorSymbolNotFound:
-            return hipErrorNotFound;
-        case cudaErrorArrayIsMapped:
-            return hipErrorArrayIsMapped;
-        case cudaErrorNotMappedAsPointer:
-            return hipErrorNotMappedAsPointer;
-        case cudaErrorNotMappedAsArray:
-            return hipErrorNotMappedAsArray;
-        case cudaErrorNotMapped:
-            return hipErrorNotMapped;
-        case cudaErrorAlreadyAcquired:
-            return hipErrorAlreadyAcquired;
-        case cudaErrorAlreadyMapped:
-            return hipErrorAlreadyMapped;
-#endif
-#if CUDA_VERSION >= 10020
-        case cudaErrorDeviceUninitialized:
-            return hipErrorInvalidContext;
-#endif
-        case cudaErrorUnknown:
-        default:
-            return hipErrorUnknown;  // Note - translated error.
-    }
-}
-
-inline static hipError_t hipCUResultTohipError(CUresult cuError) {
-    switch (cuError) {
-        case CUDA_SUCCESS:
-            return hipSuccess;
-        case CUDA_ERROR_OUT_OF_MEMORY:
-            return hipErrorOutOfMemory;
-        case CUDA_ERROR_INVALID_VALUE:
-            return hipErrorInvalidValue;
-        case CUDA_ERROR_INVALID_DEVICE:
-            return hipErrorInvalidDevice;
-        case CUDA_ERROR_DEINITIALIZED:
-            return hipErrorDeinitialized;
-        case CUDA_ERROR_NO_DEVICE:
-            return hipErrorNoDevice;
-        case CUDA_ERROR_INVALID_CONTEXT:
-            return hipErrorInvalidContext;
-        case CUDA_ERROR_NOT_INITIALIZED:
-            return hipErrorNotInitialized;
-        case CUDA_ERROR_INVALID_HANDLE:
-            return hipErrorInvalidHandle;
-        case CUDA_ERROR_MAP_FAILED:
-            return hipErrorMapFailed;
-        case CUDA_ERROR_PROFILER_DISABLED:
-            return hipErrorProfilerDisabled;
-        case CUDA_ERROR_PROFILER_NOT_INITIALIZED:
-            return hipErrorProfilerNotInitialized;
-        case CUDA_ERROR_PROFILER_ALREADY_STARTED:
-            return hipErrorProfilerAlreadyStarted;
-        case CUDA_ERROR_PROFILER_ALREADY_STOPPED:
-            return hipErrorProfilerAlreadyStopped;
-        case CUDA_ERROR_INVALID_IMAGE:
-            return hipErrorInvalidImage;
-        case CUDA_ERROR_CONTEXT_ALREADY_CURRENT:
-            return hipErrorContextAlreadyCurrent;
-        case CUDA_ERROR_UNMAP_FAILED:
-            return hipErrorUnmapFailed;
-        case CUDA_ERROR_ARRAY_IS_MAPPED:
-            return hipErrorArrayIsMapped;
-        case CUDA_ERROR_ALREADY_MAPPED:
-            return hipErrorAlreadyMapped;
-        case CUDA_ERROR_NO_BINARY_FOR_GPU:
-            return hipErrorNoBinaryForGpu;
-        case CUDA_ERROR_ALREADY_ACQUIRED:
-            return hipErrorAlreadyAcquired;
-        case CUDA_ERROR_NOT_MAPPED:
-            return hipErrorNotMapped;
-        case CUDA_ERROR_NOT_MAPPED_AS_ARRAY:
-            return hipErrorNotMappedAsArray;
-        case CUDA_ERROR_NOT_MAPPED_AS_POINTER:
-            return hipErrorNotMappedAsPointer;
-        case CUDA_ERROR_ECC_UNCORRECTABLE:
-            return hipErrorECCNotCorrectable;
-        case CUDA_ERROR_UNSUPPORTED_LIMIT:
-            return hipErrorUnsupportedLimit;
-        case CUDA_ERROR_CONTEXT_ALREADY_IN_USE:
-            return hipErrorContextAlreadyInUse;
-        case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED:
-            return hipErrorPeerAccessUnsupported;
-        case CUDA_ERROR_INVALID_PTX:
-            return hipErrorInvalidKernelFile;
-        case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT:
-            return hipErrorInvalidGraphicsContext;
-        case CUDA_ERROR_INVALID_SOURCE:
-            return hipErrorInvalidSource;
-        case CUDA_ERROR_FILE_NOT_FOUND:
-            return hipErrorFileNotFound;
-        case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND:
-            return hipErrorSharedObjectSymbolNotFound;
-        case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED:
-            return hipErrorSharedObjectInitFailed;
-        case CUDA_ERROR_OPERATING_SYSTEM:
-            return hipErrorOperatingSystem;
-        case CUDA_ERROR_NOT_FOUND:
-            return hipErrorNotFound;
-        case CUDA_ERROR_NOT_READY:
-            return hipErrorNotReady;
-        case CUDA_ERROR_ILLEGAL_ADDRESS:
-            return hipErrorIllegalAddress;
-        case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES:
-            return hipErrorLaunchOutOfResources;
-        case CUDA_ERROR_LAUNCH_TIMEOUT:
-            return hipErrorLaunchTimeOut;
-        case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED:
-            return hipErrorPeerAccessAlreadyEnabled;
-        case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED:
-            return hipErrorPeerAccessNotEnabled;
-        case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE:
-            return hipErrorSetOnActiveProcess;
-        case CUDA_ERROR_ASSERT:
-            return hipErrorAssert;
-        case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED:
-            return hipErrorHostMemoryAlreadyRegistered;
-        case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED:
-            return hipErrorHostMemoryNotRegistered;
-        case CUDA_ERROR_LAUNCH_FAILED:
-            return hipErrorLaunchFailure;
-        case CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE:
-            return hipErrorCooperativeLaunchTooLarge;
-        case CUDA_ERROR_NOT_SUPPORTED:
-            return hipErrorNotSupported;
-        case CUDA_ERROR_UNKNOWN:
-        default:
-            return hipErrorUnknown;  // Note - translated error.
-    }
-}
-
-inline static cudaError_t hipErrorToCudaError(hipError_t hError) {
-    switch (hError) {
-        case hipSuccess:
-            return cudaSuccess;
-        case hipErrorOutOfMemory:
-            return cudaErrorMemoryAllocation;
-        case hipErrorProfilerDisabled:
-          return cudaErrorProfilerDisabled;
-        case hipErrorProfilerNotInitialized:
-            return cudaErrorProfilerNotInitialized;
-        case hipErrorProfilerAlreadyStarted:
-            return cudaErrorProfilerAlreadyStarted;
-        case hipErrorProfilerAlreadyStopped:
-            return cudaErrorProfilerAlreadyStopped;
-        case hipErrorInvalidConfiguration:
-            return cudaErrorInvalidConfiguration;
-        case hipErrorLaunchOutOfResources:
-            return cudaErrorLaunchOutOfResources;
-        case hipErrorInvalidValue:
-            return cudaErrorInvalidValue;
-        case hipErrorInvalidHandle:
-            return cudaErrorInvalidResourceHandle;
-        case hipErrorInvalidDevice:
-            return cudaErrorInvalidDevice;
-        case hipErrorInvalidMemcpyDirection:
-            return cudaErrorInvalidMemcpyDirection;
-        case hipErrorInvalidDevicePointer:
-            return cudaErrorInvalidDevicePointer;
-        case hipErrorNotInitialized:
-            return cudaErrorInitializationError;
-        case hipErrorNoDevice:
-            return cudaErrorNoDevice;
-        case hipErrorNotReady:
-            return cudaErrorNotReady;
-        case hipErrorPeerAccessNotEnabled:
-            return cudaErrorPeerAccessNotEnabled;
-        case hipErrorPeerAccessAlreadyEnabled:
-            return cudaErrorPeerAccessAlreadyEnabled;
-        case hipErrorHostMemoryAlreadyRegistered:
-            return cudaErrorHostMemoryAlreadyRegistered;
-        case hipErrorHostMemoryNotRegistered:
-            return cudaErrorHostMemoryNotRegistered;
-        case hipErrorDeinitialized:
-            return cudaErrorCudartUnloading;
-        case hipErrorInvalidSymbol:
-            return cudaErrorInvalidSymbol;
-        case hipErrorInsufficientDriver:
-            return cudaErrorInsufficientDriver;
-        case hipErrorMissingConfiguration:
-            return cudaErrorMissingConfiguration;
-        case hipErrorPriorLaunchFailure:
-            return cudaErrorPriorLaunchFailure;
-        case hipErrorInvalidDeviceFunction:
-            return cudaErrorInvalidDeviceFunction;
-        case hipErrorInvalidImage:
-            return cudaErrorInvalidKernelImage;
-        case hipErrorInvalidContext:
-#if CUDA_VERSION >= 10020
-            return cudaErrorDeviceUninitialized;
-#else
-            return cudaErrorUnknown;
-#endif
-        case hipErrorMapFailed:
-            return cudaErrorMapBufferObjectFailed;
-        case hipErrorUnmapFailed:
-            return cudaErrorUnmapBufferObjectFailed;
-        case hipErrorArrayIsMapped:
-#if CUDA_VERSION >= 10010
-            return cudaErrorArrayIsMapped;
-#else
-            return cudaErrorUnknown;
-#endif
-        case hipErrorAlreadyMapped:
-#if CUDA_VERSION >= 10010
-            return cudaErrorAlreadyMapped;
-#else
-            return cudaErrorUnknown;
-#endif
-        case hipErrorNoBinaryForGpu:
-            return cudaErrorNoKernelImageForDevice;
-        case hipErrorAlreadyAcquired:
-#if CUDA_VERSION >= 10010
-            return cudaErrorAlreadyAcquired;
-#else
-            return cudaErrorUnknown;
-#endif
-        case hipErrorNotMapped:
-#if CUDA_VERSION >= 10010
-            return cudaErrorNotMapped;
-#else
-            return cudaErrorUnknown;
-#endif
-        case hipErrorNotMappedAsArray:
-#if CUDA_VERSION >= 10010
-            return cudaErrorNotMappedAsArray;
-#else
-            return cudaErrorUnknown;
-#endif
-        case hipErrorNotMappedAsPointer:
-#if CUDA_VERSION >= 10010
-            return cudaErrorNotMappedAsPointer;
-#else
-            return cudaErrorUnknown;
-#endif
-        case hipErrorECCNotCorrectable:
-            return cudaErrorECCUncorrectable;
-        case hipErrorUnsupportedLimit:
-            return cudaErrorUnsupportedLimit;
-        case hipErrorContextAlreadyInUse:
-            return cudaErrorDeviceAlreadyInUse;
-        case hipErrorPeerAccessUnsupported:
-            return cudaErrorPeerAccessUnsupported;
-        case hipErrorInvalidKernelFile:
-            return cudaErrorInvalidPtx;
-        case hipErrorInvalidGraphicsContext:
-            return cudaErrorInvalidGraphicsContext;
-        case hipErrorInvalidSource:
-#if CUDA_VERSION >= 10010
-            return cudaErrorInvalidSource;
-#else
-            return cudaErrorUnknown;
-#endif
-        case hipErrorFileNotFound:
-#if CUDA_VERSION >= 10010
-            return cudaErrorFileNotFound;
-#else
-            return cudaErrorUnknown;
-#endif
-        case hipErrorSharedObjectSymbolNotFound:
-            return cudaErrorSharedObjectSymbolNotFound;
-        case hipErrorSharedObjectInitFailed:
-            return cudaErrorSharedObjectInitFailed;
-        case hipErrorOperatingSystem:
-            return cudaErrorOperatingSystem;
-        case hipErrorNotFound:
-#if CUDA_VERSION >= 10010
-            return cudaErrorSymbolNotFound;
-#else
-            return cudaErrorUnknown;
-#endif
-        case hipErrorIllegalAddress:
-            return cudaErrorIllegalAddress;
-        case hipErrorLaunchTimeOut:
-            return cudaErrorLaunchTimeout;
-        case hipErrorSetOnActiveProcess:
-            return cudaErrorSetOnActiveProcess;
-        case hipErrorLaunchFailure:
-            return cudaErrorLaunchFailure;
-        case hipErrorCooperativeLaunchTooLarge:
-            return cudaErrorCooperativeLaunchTooLarge;
-        case hipErrorNotSupported:
-            return cudaErrorNotSupported;
-        // HSA: does not exist in CUDA
-        case hipErrorRuntimeMemory:
-        // HSA: does not exist in CUDA
-        case hipErrorRuntimeOther:
-        case hipErrorUnknown:
-        case hipErrorTbd:
-        default:
-            return cudaErrorUnknown;  // Note - translated error.
-    }
-}
-
-inline static enum cudaMemcpyKind hipMemcpyKindToCudaMemcpyKind(hipMemcpyKind kind) {
-    switch (kind) {
-        case hipMemcpyHostToHost:
-            return cudaMemcpyHostToHost;
-        case hipMemcpyHostToDevice:
-            return cudaMemcpyHostToDevice;
-        case hipMemcpyDeviceToHost:
-            return cudaMemcpyDeviceToHost;
-        case hipMemcpyDeviceToDevice:
-            return cudaMemcpyDeviceToDevice;
-        default:
-            return cudaMemcpyDefault;
-    }
-}
-
-inline static enum cudaTextureAddressMode hipTextureAddressModeToCudaTextureAddressMode(
-    hipTextureAddressMode kind) {
-    switch (kind) {
-        case hipAddressModeWrap:
-            return cudaAddressModeWrap;
-        case hipAddressModeClamp:
-            return cudaAddressModeClamp;
-        case hipAddressModeMirror:
-            return cudaAddressModeMirror;
-        case hipAddressModeBorder:
-            return cudaAddressModeBorder;
-        default:
-            return cudaAddressModeWrap;
-    }
-}
-
-inline static enum cudaTextureFilterMode hipTextureFilterModeToCudaTextureFilterMode(
-    hipTextureFilterMode kind) {
-    switch (kind) {
-        case hipFilterModePoint:
-            return cudaFilterModePoint;
-        case hipFilterModeLinear:
-            return cudaFilterModeLinear;
-        default:
-            return cudaFilterModePoint;
-    }
-}
-
-inline static enum cudaTextureReadMode hipTextureReadModeToCudaTextureReadMode(hipTextureReadMode kind) {
-    switch (kind) {
-        case hipReadModeElementType:
-            return cudaReadModeElementType;
-        case hipReadModeNormalizedFloat:
-            return cudaReadModeNormalizedFloat;
-        default:
-            return cudaReadModeElementType;
-    }
-}
-
-inline static enum cudaChannelFormatKind hipChannelFormatKindToCudaChannelFormatKind(
-    hipChannelFormatKind kind) {
-    switch (kind) {
-        case hipChannelFormatKindSigned:
-            return cudaChannelFormatKindSigned;
-        case hipChannelFormatKindUnsigned:
-            return cudaChannelFormatKindUnsigned;
-        case hipChannelFormatKindFloat:
-            return cudaChannelFormatKindFloat;
-        case hipChannelFormatKindNone:
-            return cudaChannelFormatKindNone;
-        default:
-            return cudaChannelFormatKindNone;
-    }
-}
-
-/**
- * Stream CallBack struct
- */
-#define HIPRT_CB CUDART_CB
-typedef void(HIPRT_CB* hipStreamCallback_t)(hipStream_t stream, hipError_t status, void* userData);
-inline static hipError_t hipInit(unsigned int flags) {
-    return hipCUResultTohipError(cuInit(flags));
-}
-
-inline static hipError_t hipDeviceReset() { return hipCUDAErrorTohipError(cudaDeviceReset()); }
-
-inline static hipError_t hipGetLastError() { return hipCUDAErrorTohipError(cudaGetLastError()); }
-
-inline static hipError_t hipPeekAtLastError() {
-    return hipCUDAErrorTohipError(cudaPeekAtLastError());
-}
-
-inline static hipError_t hipMalloc(void** ptr, size_t size) {
-    return hipCUDAErrorTohipError(cudaMalloc(ptr, size));
-}
-
-inline static hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) {
-    return hipCUDAErrorTohipError(cudaMallocPitch(ptr, pitch, width, height));
-}
-
-inline static hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr,size_t* pitch,size_t widthInBytes,size_t height,unsigned int elementSizeBytes){
-    return hipCUResultTohipError(cuMemAllocPitch(dptr,pitch,widthInBytes,height,elementSizeBytes));
-}
-
-inline static hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) {
-    return hipCUDAErrorTohipError(cudaMalloc3D(pitchedDevPtr, extent));
-}
-
-inline static hipError_t hipFree(void* ptr) { return hipCUDAErrorTohipError(cudaFree(ptr)); }
-
-inline static hipError_t hipMallocHost(void** ptr, size_t size)
-    __attribute__((deprecated("use hipHostMalloc instead")));
-inline static hipError_t hipMallocHost(void** ptr, size_t size) {
-    return hipCUDAErrorTohipError(cudaMallocHost(ptr, size));
-}
-
-inline static hipError_t hipMemAllocHost(void** ptr, size_t size)
-    __attribute__((deprecated("use hipHostMalloc instead")));
-inline static hipError_t hipMemAllocHost(void** ptr, size_t size) {
-    return hipCUResultTohipError(cuMemAllocHost(ptr, size));
-}
-
-inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags)
-    __attribute__((deprecated("use hipHostMalloc instead")));
-inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags) {
-    return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags));
-}
-
-inline static hipError_t hipHostMalloc(void** ptr, size_t size, unsigned int flags) {
-    return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags));
-}
-
-inline static hipError_t hipMallocManaged(void** ptr, size_t size, unsigned int flags) {
-    return hipCUDAErrorTohipError(cudaMallocManaged(ptr, size, flags));
-}
-
-inline static hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc,
-                                        size_t width, size_t height,
-                                        unsigned int flags __dparm(hipArrayDefault)) {
-    return hipCUDAErrorTohipError(cudaMallocArray(array, desc, width, height, flags));
-}
-
-inline static hipError_t hipMalloc3DArray(hipArray** array, const hipChannelFormatDesc* desc,
-                             hipExtent extent, unsigned int flags) {
-    return hipCUDAErrorTohipError(cudaMalloc3DArray(array, desc, extent, flags));
-}
-
-inline static hipError_t hipFreeArray(hipArray* array) {
-    return hipCUDAErrorTohipError(cudaFreeArray(array));
-}
-
-inline static hipError_t hipHostGetDevicePointer(void** devPtr, void* hostPtr, unsigned int flags) {
-    return hipCUDAErrorTohipError(cudaHostGetDevicePointer(devPtr, hostPtr, flags));
-}
-
-inline static hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) {
-    return hipCUDAErrorTohipError(cudaHostGetFlags(flagsPtr, hostPtr));
-}
-
-inline static hipError_t hipHostRegister(void* ptr, size_t size, unsigned int flags) {
-    return hipCUDAErrorTohipError(cudaHostRegister(ptr, size, flags));
-}
-
-inline static hipError_t hipHostUnregister(void* ptr) {
-    return hipCUDAErrorTohipError(cudaHostUnregister(ptr));
-}
-
-inline static hipError_t hipFreeHost(void* ptr)
-    __attribute__((deprecated("use hipHostFree instead")));
-inline static hipError_t hipFreeHost(void* ptr) {
-    return hipCUDAErrorTohipError(cudaFreeHost(ptr));
-}
-
-inline static hipError_t hipHostFree(void* ptr) {
-    return hipCUDAErrorTohipError(cudaFreeHost(ptr));
-}
-
-inline static hipError_t hipSetDevice(int device) {
-    return hipCUDAErrorTohipError(cudaSetDevice(device));
-}
-
-inline static hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* prop) {
-    struct cudaDeviceProp cdprop;
-    memset(&cdprop, 0x0, sizeof(struct cudaDeviceProp));
-    cdprop.major = prop->major;
-    cdprop.minor = prop->minor;
-    cdprop.totalGlobalMem = prop->totalGlobalMem;
-    cdprop.sharedMemPerBlock = prop->sharedMemPerBlock;
-    cdprop.regsPerBlock = prop->regsPerBlock;
-    cdprop.warpSize = prop->warpSize;
-    cdprop.maxThreadsPerBlock = prop->maxThreadsPerBlock;
-    cdprop.clockRate = prop->clockRate;
-    cdprop.totalConstMem = prop->totalConstMem;
-    cdprop.multiProcessorCount = prop->multiProcessorCount;
-    cdprop.l2CacheSize = prop->l2CacheSize;
-    cdprop.maxThreadsPerMultiProcessor = prop->maxThreadsPerMultiProcessor;
-    cdprop.computeMode = prop->computeMode;
-    cdprop.canMapHostMemory = prop->canMapHostMemory;
-    cdprop.memoryClockRate = prop->memoryClockRate;
-    cdprop.memoryBusWidth = prop->memoryBusWidth;
-    return hipCUDAErrorTohipError(cudaChooseDevice(device, &cdprop));
-}
-
-inline static hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t size) {
-    return hipCUResultTohipError(cuMemcpyHtoD(dst, src, size));
-}
-
-inline static hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t size) {
-    return hipCUResultTohipError(cuMemcpyDtoH(dst, src, size));
-}
-
-inline static hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t size) {
-    return hipCUResultTohipError(cuMemcpyDtoD(dst, src, size));
-}
-
-inline static hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t size,
-                                            hipStream_t stream) {
-    return hipCUResultTohipError(cuMemcpyHtoDAsync(dst, src, size, stream));
-}
-
-inline static hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t size,
-                                            hipStream_t stream) {
-    return hipCUResultTohipError(cuMemcpyDtoHAsync(dst, src, size, stream));
-}
-
-inline static hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t size,
-                                            hipStream_t stream) {
-    return hipCUResultTohipError(cuMemcpyDtoDAsync(dst, src, size, stream));
-}
-
-inline static hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes,
-                                   hipMemcpyKind copyKind) {
-    return hipCUDAErrorTohipError(
-        cudaMemcpy(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind)));
-}
-
-
-inline static hipError_t hipMemcpyWithStream(void* dst, const void* src,
-				      size_t sizeBytes, hipMemcpyKind copyKind,
-				      hipStream_t stream) {
-	cudaError_t error = cudaMemcpyAsync(dst, src, sizeBytes, 
-										hipMemcpyKindToCudaMemcpyKind(copyKind),
-										stream);
-	
-	if (error != cudaSuccess) return hipCUDAErrorTohipError(error);
-	
-	return hipCUDAErrorTohipError(cudaStreamSynchronize(stream));
-}
-
-inline static hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes,
-                                        hipMemcpyKind copyKind, hipStream_t stream __dparm(0)) {
-    return hipCUDAErrorTohipError(
-        cudaMemcpyAsync(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind), stream));
-}
-
-inline static hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t sizeBytes,
-                                           size_t offset __dparm(0),
-                                           hipMemcpyKind copyType __dparm(hipMemcpyHostToDevice)) {
-    return hipCUDAErrorTohipError(cudaMemcpyToSymbol(symbol, src, sizeBytes, offset,
-                                                     hipMemcpyKindToCudaMemcpyKind(copyType)));
-}
-
-inline static hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src,
-                                                size_t sizeBytes, size_t offset,
-                                                hipMemcpyKind copyType,
-                                                hipStream_t stream __dparm(0)) {
-    return hipCUDAErrorTohipError(cudaMemcpyToSymbolAsync(
-        symbol, src, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(copyType), stream));
-}
-
-inline static hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t sizeBytes,
-                                             size_t offset __dparm(0),
-                                             hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)) {
-    return hipCUDAErrorTohipError(cudaMemcpyFromSymbol(dst, symbolName, sizeBytes, offset,
-                                                       hipMemcpyKindToCudaMemcpyKind(kind)));
-}
-
-inline static hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName,
-                                                  size_t sizeBytes, size_t offset,
-                                                  hipMemcpyKind kind,
-                                                  hipStream_t stream __dparm(0)) {
-    return hipCUDAErrorTohipError(cudaMemcpyFromSymbolAsync(
-        dst, symbolName, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(kind), stream));
-}
-
-inline static hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) {
-    return hipCUDAErrorTohipError(cudaGetSymbolAddress(devPtr, symbolName));
-}
-
-inline static hipError_t hipGetSymbolSize(size_t* size, const void* symbolName) {
-    return hipCUDAErrorTohipError(cudaGetSymbolSize(size, symbolName));
-}
-
-inline static hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch,
-                                     size_t width, size_t height, hipMemcpyKind kind) {
-    return hipCUDAErrorTohipError(
-        cudaMemcpy2D(dst, dpitch, src, spitch, width, height, hipMemcpyKindToCudaMemcpyKind(kind)));
-}
-
-inline static hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) {
-  return hipCUResultTohipError(cuMemcpy2D(pCopy));
-}
-
-inline static hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, hipStream_t stream __dparm(0)) {
-  return hipCUResultTohipError(cuMemcpy2DAsync(pCopy, stream));
-}
-
-inline static hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p)
-{
-    return hipCUDAErrorTohipError(cudaMemcpy3D(p));
-}
-
-inline static hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms *p, hipStream_t stream)
-{
-    return hipCUDAErrorTohipError(cudaMemcpy3DAsync(p, stream));
-}
-
-inline static hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch,
-                                          size_t width, size_t height, hipMemcpyKind kind,
-                                          hipStream_t stream) {
-    return hipCUDAErrorTohipError(cudaMemcpy2DAsync(dst, dpitch, src, spitch, width, height,
-                                                    hipMemcpyKindToCudaMemcpyKind(kind), stream));
-}
-
-inline static hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset,
-                                            const void* src, size_t spitch, size_t width,
-                                            size_t height, hipMemcpyKind kind) {
-    return hipCUDAErrorTohipError(cudaMemcpy2DToArray(dst, wOffset, hOffset, src, spitch, width,
-                                                      height, hipMemcpyKindToCudaMemcpyKind(kind)));
-}
-
-__HIP_DEPRECATED inline static hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset,
-                                                           size_t hOffset, const void* src,
-                                                           size_t count, hipMemcpyKind kind) {
-    return hipCUDAErrorTohipError(
-        cudaMemcpyToArray(dst, wOffset, hOffset, src, count, hipMemcpyKindToCudaMemcpyKind(kind)));
-}
-
-__HIP_DEPRECATED inline static hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray,
-                                                             size_t wOffset, size_t hOffset,
-                                                             size_t count, hipMemcpyKind kind) {
-    return hipCUDAErrorTohipError(cudaMemcpyFromArray(dst, srcArray, wOffset, hOffset, count,
-                                                      hipMemcpyKindToCudaMemcpyKind(kind)));
-}
-
-inline static hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset,
-                                       size_t count) {
-    return hipCUResultTohipError(cuMemcpyAtoH(dst, (CUarray)srcArray, srcOffset, count));
-}
-
-inline static hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHost,
-                                       size_t count) {
-    return hipCUResultTohipError(cuMemcpyHtoA((CUarray)dstArray, dstOffset, srcHost, count));
-}
-
-inline static hipError_t hipDeviceSynchronize() {
-    return hipCUDAErrorTohipError(cudaDeviceSynchronize());
-}
-
-inline static hipError_t hipDeviceGetCacheConfig(hipFuncCache_t* pCacheConfig) {
-    return hipCUDAErrorTohipError(cudaDeviceGetCacheConfig(pCacheConfig));
-}
-
-inline static hipError_t hipFuncSetAttribute(const void* func, hipFuncAttribute attr, int value) {
-    return hipCUDAErrorTohipError(cudaFuncSetAttribute(func, attr, value));
-}
-
-inline static hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig) {
-    return hipCUDAErrorTohipError(cudaDeviceSetCacheConfig(cacheConfig));
-}
-
-inline static hipError_t hipFuncSetSharedMemConfig(const void* func, hipSharedMemConfig config) {
-    return hipCUDAErrorTohipError(cudaFuncSetSharedMemConfig(func, config));
-}
-
-inline static const char* hipGetErrorString(hipError_t error) {
-    return cudaGetErrorString(hipErrorToCudaError(error));
-}
-
-inline static const char* hipGetErrorName(hipError_t error) {
-    return cudaGetErrorName(hipErrorToCudaError(error));
-}
-
-inline static hipError_t hipGetDeviceCount(int* count) {
-    return hipCUDAErrorTohipError(cudaGetDeviceCount(count));
-}
-
-inline static hipError_t hipGetDevice(int* device) {
-    return hipCUDAErrorTohipError(cudaGetDevice(device));
-}
-
-inline static hipError_t hipIpcCloseMemHandle(void* devPtr) {
-    return hipCUDAErrorTohipError(cudaIpcCloseMemHandle(devPtr));
-}
-
-inline static hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event) {
-    return hipCUDAErrorTohipError(cudaIpcGetEventHandle(handle, event));
-}
-
-inline static hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr) {
-    return hipCUDAErrorTohipError(cudaIpcGetMemHandle(handle, devPtr));
-}
-
-inline static hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle) {
-    return hipCUDAErrorTohipError(cudaIpcOpenEventHandle(event, handle));
-}
-
-inline static hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle,
-                                             unsigned int flags) {
-    return hipCUDAErrorTohipError(cudaIpcOpenMemHandle(devPtr, handle, flags));
-}
-
-inline static hipError_t hipMemset(void* devPtr, int value, size_t count) {
-    return hipCUDAErrorTohipError(cudaMemset(devPtr, value, count));
-}
-
-inline static hipError_t hipMemsetD32(hipDeviceptr_t devPtr, int value, size_t count) {
-    return hipCUResultTohipError(cuMemsetD32(devPtr, value, count));
-}
-
-inline static hipError_t hipMemsetAsync(void* devPtr, int value, size_t count,
-                                        hipStream_t stream __dparm(0)) {
-    return hipCUDAErrorTohipError(cudaMemsetAsync(devPtr, value, count, stream));
-}
-
-inline static hipError_t hipMemsetD32Async(hipDeviceptr_t devPtr, int value, size_t count,
-                                           hipStream_t stream __dparm(0)) {
-    return hipCUResultTohipError(cuMemsetD32Async(devPtr, value, count, stream));
-}
-
-inline static hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes) {
-    return hipCUResultTohipError(cuMemsetD8(dest, value, sizeBytes));
-}
-
-inline static hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes,
-                                          hipStream_t stream __dparm(0)) {
-    return hipCUResultTohipError(cuMemsetD8Async(dest, value, sizeBytes, stream));
-}
-
-inline static hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes) {
-    return hipCUResultTohipError(cuMemsetD16(dest, value, sizeBytes));
-}
-
-inline static hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes,
-                                           hipStream_t stream __dparm(0)) {
-    return hipCUResultTohipError(cuMemsetD16Async(dest, value, sizeBytes, stream));
-}
-
-inline static hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) {
-    return hipCUDAErrorTohipError(cudaMemset2D(dst, pitch, value, width, height));
-}
-
-inline static hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream __dparm(0)) {
-    return hipCUDAErrorTohipError(cudaMemset2DAsync(dst, pitch, value, width, height, stream));
-}
-
-inline static hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int  value, hipExtent extent ){
-    return hipCUDAErrorTohipError(cudaMemset3D(pitchedDevPtr, value, extent));
-}
-
-inline static hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int  value, hipExtent extent, hipStream_t stream __dparm(0) ){
-    return hipCUDAErrorTohipError(cudaMemset3DAsync(pitchedDevPtr, value, extent, stream));
-}
-
-inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t* p_prop, int device) {
-    struct cudaDeviceProp cdprop;
-    cudaError_t cerror;
-    cerror = cudaGetDeviceProperties(&cdprop, device);
-
-    strncpy(p_prop->name, cdprop.name, 256);
-    p_prop->totalGlobalMem = cdprop.totalGlobalMem;
-    p_prop->sharedMemPerBlock = cdprop.sharedMemPerBlock;
-    p_prop->regsPerBlock = cdprop.regsPerBlock;
-    p_prop->warpSize = cdprop.warpSize;
-    p_prop->maxThreadsPerBlock = cdprop.maxThreadsPerBlock;
-    for (int i = 0; i < 3; i++) {
-        p_prop->maxThreadsDim[i] = cdprop.maxThreadsDim[i];
-        p_prop->maxGridSize[i] = cdprop.maxGridSize[i];
-    }
-    p_prop->clockRate = cdprop.clockRate;
-    p_prop->memoryClockRate = cdprop.memoryClockRate;
-    p_prop->memoryBusWidth = cdprop.memoryBusWidth;
-    p_prop->totalConstMem = cdprop.totalConstMem;
-    p_prop->major = cdprop.major;
-    p_prop->minor = cdprop.minor;
-    p_prop->multiProcessorCount = cdprop.multiProcessorCount;
-    p_prop->l2CacheSize = cdprop.l2CacheSize;
-    p_prop->maxThreadsPerMultiProcessor = cdprop.maxThreadsPerMultiProcessor;
-    p_prop->computeMode = cdprop.computeMode;
-    p_prop->clockInstructionRate = cdprop.clockRate; // Same as clock-rate:
-
-    int ccVers = p_prop->major * 100 + p_prop->minor * 10;
-    p_prop->arch.hasGlobalInt32Atomics = (ccVers >= 110);
-    p_prop->arch.hasGlobalFloatAtomicExch = (ccVers >= 110);
-    p_prop->arch.hasSharedInt32Atomics = (ccVers >= 120);
-    p_prop->arch.hasSharedFloatAtomicExch = (ccVers >= 120);
-    p_prop->arch.hasFloatAtomicAdd = (ccVers >= 200);
-    p_prop->arch.hasGlobalInt64Atomics = (ccVers >= 120);
-    p_prop->arch.hasSharedInt64Atomics = (ccVers >= 110);
-    p_prop->arch.hasDoubles = (ccVers >= 130);
-    p_prop->arch.hasWarpVote = (ccVers >= 120);
-    p_prop->arch.hasWarpBallot = (ccVers >= 200);
-    p_prop->arch.hasWarpShuffle = (ccVers >= 300);
-    p_prop->arch.hasFunnelShift = (ccVers >= 350);
-    p_prop->arch.hasThreadFenceSystem = (ccVers >= 200);
-    p_prop->arch.hasSyncThreadsExt = (ccVers >= 200);
-    p_prop->arch.hasSurfaceFuncs = (ccVers >= 200);
-    p_prop->arch.has3dGrid = (ccVers >= 200);
-    p_prop->arch.hasDynamicParallelism = (ccVers >= 350);
-
-    p_prop->concurrentKernels = cdprop.concurrentKernels;
-    p_prop->pciDomainID = cdprop.pciDomainID;
-    p_prop->pciBusID = cdprop.pciBusID;
-    p_prop->pciDeviceID = cdprop.pciDeviceID;
-    p_prop->maxSharedMemoryPerMultiProcessor = cdprop.sharedMemPerMultiprocessor;
-    p_prop->isMultiGpuBoard = cdprop.isMultiGpuBoard;
-    p_prop->canMapHostMemory = cdprop.canMapHostMemory;
-    p_prop->gcnArch = 0; // Not a GCN arch
-    p_prop->integrated = cdprop.integrated;
-    p_prop->cooperativeLaunch = cdprop.cooperativeLaunch;
-    p_prop->cooperativeMultiDeviceLaunch = cdprop.cooperativeMultiDeviceLaunch;
-    p_prop->cooperativeMultiDeviceUnmatchedFunc = 0;
-    p_prop->cooperativeMultiDeviceUnmatchedGridDim = 0;
-    p_prop->cooperativeMultiDeviceUnmatchedBlockDim = 0;
-    p_prop->cooperativeMultiDeviceUnmatchedSharedMem = 0;
-
-    p_prop->maxTexture1D    = cdprop.maxTexture1D;
-    p_prop->maxTexture2D[0] = cdprop.maxTexture2D[0];
-    p_prop->maxTexture2D[1] = cdprop.maxTexture2D[1];
-    p_prop->maxTexture3D[0] = cdprop.maxTexture3D[0];
-    p_prop->maxTexture3D[1] = cdprop.maxTexture3D[1];
-    p_prop->maxTexture3D[2] = cdprop.maxTexture3D[2];
-
-    p_prop->memPitch                 = cdprop.memPitch;
-    p_prop->textureAlignment         = cdprop.textureAlignment;
-    p_prop->texturePitchAlignment    = cdprop.texturePitchAlignment;
-    p_prop->kernelExecTimeoutEnabled = cdprop.kernelExecTimeoutEnabled;
-    p_prop->ECCEnabled               = cdprop.ECCEnabled;
-    p_prop->tccDriver                = cdprop.tccDriver;
-
-    return hipCUDAErrorTohipError(cerror);
-}
-
-inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) {
-    enum cudaDeviceAttr cdattr;
-    cudaError_t cerror;
-
-    switch (attr) {
-        case hipDeviceAttributeMaxThreadsPerBlock:
-            cdattr = cudaDevAttrMaxThreadsPerBlock;
-            break;
-        case hipDeviceAttributeMaxBlockDimX:
-            cdattr = cudaDevAttrMaxBlockDimX;
-            break;
-        case hipDeviceAttributeMaxBlockDimY:
-            cdattr = cudaDevAttrMaxBlockDimY;
-            break;
-        case hipDeviceAttributeMaxBlockDimZ:
-            cdattr = cudaDevAttrMaxBlockDimZ;
-            break;
-        case hipDeviceAttributeMaxGridDimX:
-            cdattr = cudaDevAttrMaxGridDimX;
-            break;
-        case hipDeviceAttributeMaxGridDimY:
-            cdattr = cudaDevAttrMaxGridDimY;
-            break;
-        case hipDeviceAttributeMaxGridDimZ:
-            cdattr = cudaDevAttrMaxGridDimZ;
-            break;
-        case hipDeviceAttributeMaxSharedMemoryPerBlock:
-            cdattr = cudaDevAttrMaxSharedMemoryPerBlock;
-            break;
-        case hipDeviceAttributeTotalConstantMemory:
-            cdattr = cudaDevAttrTotalConstantMemory;
-            break;
-        case hipDeviceAttributeWarpSize:
-            cdattr = cudaDevAttrWarpSize;
-            break;
-        case hipDeviceAttributeMaxRegistersPerBlock:
-            cdattr = cudaDevAttrMaxRegistersPerBlock;
-            break;
-        case hipDeviceAttributeClockRate:
-            cdattr = cudaDevAttrClockRate;
-            break;
-        case hipDeviceAttributeMemoryClockRate:
-            cdattr = cudaDevAttrMemoryClockRate;
-            break;
-        case hipDeviceAttributeMemoryBusWidth:
-            cdattr = cudaDevAttrGlobalMemoryBusWidth;
-            break;
-        case hipDeviceAttributeMultiprocessorCount:
-            cdattr = cudaDevAttrMultiProcessorCount;
-            break;
-        case hipDeviceAttributeComputeMode:
-            cdattr = cudaDevAttrComputeMode;
-            break;
-        case hipDeviceAttributeL2CacheSize:
-            cdattr = cudaDevAttrL2CacheSize;
-            break;
-        case hipDeviceAttributeMaxThreadsPerMultiProcessor:
-            cdattr = cudaDevAttrMaxThreadsPerMultiProcessor;
-            break;
-        case hipDeviceAttributeComputeCapabilityMajor:
-            cdattr = cudaDevAttrComputeCapabilityMajor;
-            break;
-        case hipDeviceAttributeComputeCapabilityMinor:
-            cdattr = cudaDevAttrComputeCapabilityMinor;
-            break;
-        case hipDeviceAttributeConcurrentKernels:
-            cdattr = cudaDevAttrConcurrentKernels;
-            break;
-        case hipDeviceAttributePciBusId:
-            cdattr = cudaDevAttrPciBusId;
-            break;
-        case hipDeviceAttributePciDeviceId:
-            cdattr = cudaDevAttrPciDeviceId;
-            break;
-        case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor:
-            cdattr = cudaDevAttrMaxSharedMemoryPerMultiprocessor;
-            break;
-        case hipDeviceAttributeIsMultiGpuBoard:
-            cdattr = cudaDevAttrIsMultiGpuBoard;
-            break;
-        case hipDeviceAttributeIntegrated:
-            cdattr = cudaDevAttrIntegrated;
-            break;
-        case hipDeviceAttributeMaxTexture1DWidth:
-            cdattr = cudaDevAttrMaxTexture1DWidth;
-            break;
-        case hipDeviceAttributeMaxTexture2DWidth:
-            cdattr = cudaDevAttrMaxTexture2DWidth;
-            break;
-        case hipDeviceAttributeMaxTexture2DHeight:
-            cdattr = cudaDevAttrMaxTexture2DHeight;
-            break;
-        case hipDeviceAttributeMaxTexture3DWidth:
-            cdattr = cudaDevAttrMaxTexture3DWidth;
-            break;
-        case hipDeviceAttributeMaxTexture3DHeight:
-            cdattr = cudaDevAttrMaxTexture3DHeight;
-            break;
-        case hipDeviceAttributeMaxTexture3DDepth:
-            cdattr = cudaDevAttrMaxTexture3DDepth;
-            break;
-        case hipDeviceAttributeMaxPitch:
-            cdattr = cudaDevAttrMaxPitch;
-            break;
-        case hipDeviceAttributeTextureAlignment:
-            cdattr = cudaDevAttrTextureAlignment;
-            break;
-        case hipDeviceAttributeTexturePitchAlignment:
-            cdattr = cudaDevAttrTexturePitchAlignment;
-            break;
-        case hipDeviceAttributeKernelExecTimeout:
-            cdattr = cudaDevAttrKernelExecTimeout;
-            break;
-        case hipDeviceAttributeCanMapHostMemory:
-            cdattr = cudaDevAttrCanMapHostMemory;
-            break;
-        case hipDeviceAttributeEccEnabled:
-            cdattr = cudaDevAttrEccEnabled;
-            break;
-        case hipDeviceAttributeCooperativeLaunch:
-            cdattr = cudaDevAttrCooperativeLaunch;
-            break;
-        case hipDeviceAttributeCooperativeMultiDeviceLaunch:
-            cdattr = cudaDevAttrCooperativeMultiDeviceLaunch;
-            break;
-        default:
-            return hipCUDAErrorTohipError(cudaErrorInvalidValue);
-    }
-
-    cerror = cudaDeviceGetAttribute(pi, cdattr, device);
-
-    return hipCUDAErrorTohipError(cerror);
-}
-
-inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
-                                                                      const void* func,
-                                                                      int blockSize,
-                                                                      size_t dynamicSMemSize) {
-    return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func,
-                                                              blockSize, dynamicSMemSize));
-}
-
-inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks,
-                                                                      const void* func,
-                                                                      int blockSize,
-                                                                      size_t dynamicSMemSize,
-                                                                      unsigned int flags) {
-    return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func,
-                                                      blockSize, dynamicSMemSize, flags));
-}
-
-inline static hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, 
-                                                                 hipFunction_t f,
-                                                                 int  blockSize,
-                                                                 size_t dynamicSMemSize ){
-    return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, f,
-                                                                   blockSize, dynamicSMemSize));
-}
-
-inline static hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks,
-                                                                          hipFunction_t f,
-                                                                          int  blockSize,
-                                                                          size_t dynamicSMemSize,
-                                                                          unsigned int  flags ) {
-    return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks,f,
-                                                                blockSize, dynamicSMemSize, flags));
-}
-
-//TODO - Match CUoccupancyB2DSize
-inline static hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
-                                             hipFunction_t f, size_t dynSharedMemPerBlk,
-                                             int blockSizeLimit){
-    return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSize(gridSize, blockSize, f, NULL,
-                                 dynSharedMemPerBlk, blockSizeLimit));
-}
-
-//TODO - Match CUoccupancyB2DSize
-inline static hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize,
-                                             hipFunction_t f, size_t dynSharedMemPerBlk,
-                                             int blockSizeLimit, unsigned int  flags){
-    return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSizeWithFlags(gridSize, blockSize, f, NULL,
-                                 dynSharedMemPerBlk, blockSizeLimit, flags));
-}
-
-inline static hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr) {
-    struct cudaPointerAttributes cPA;
-    hipError_t err = hipCUDAErrorTohipError(cudaPointerGetAttributes(&cPA, ptr));
-    if (err == hipSuccess) {
-#if (CUDART_VERSION >= 11000)
-        auto memType = cPA.type;
-#else
-        unsigned memType = cPA.memoryType; // No auto because cuda 10.2 doesnt force c++11
-#endif
-        switch (memType) {
-            case cudaMemoryTypeDevice:
-                attributes->memoryType = hipMemoryTypeDevice;
-                break;
-            case cudaMemoryTypeHost:
-                attributes->memoryType = hipMemoryTypeHost;
-                break;
-            default:
-                return hipErrorUnknown;
-        }
-        attributes->device = cPA.device;
-        attributes->devicePointer = cPA.devicePointer;
-        attributes->hostPointer = cPA.hostPointer;
-        attributes->isManaged = 0;
-        attributes->allocationFlags = 0;
-    }
-    return err;
-}
-
-inline static hipError_t hipMemGetInfo(size_t* free, size_t* total) {
-    return hipCUDAErrorTohipError(cudaMemGetInfo(free, total));
-}
-
-inline static hipError_t hipEventCreate(hipEvent_t* event) {
-    return hipCUDAErrorTohipError(cudaEventCreate(event));
-}
-
-inline static hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream __dparm(NULL)) {
-    return hipCUDAErrorTohipError(cudaEventRecord(event, stream));
-}
-
-inline static hipError_t hipEventSynchronize(hipEvent_t event) {
-    return hipCUDAErrorTohipError(cudaEventSynchronize(event));
-}
-
-inline static hipError_t hipEventElapsedTime(float* ms, hipEvent_t start, hipEvent_t stop) {
-    return hipCUDAErrorTohipError(cudaEventElapsedTime(ms, start, stop));
-}
-
-inline static hipError_t hipEventDestroy(hipEvent_t event) {
-    return hipCUDAErrorTohipError(cudaEventDestroy(event));
-}
-
-inline static hipError_t hipStreamCreateWithFlags(hipStream_t* stream, unsigned int flags) {
-    return hipCUDAErrorTohipError(cudaStreamCreateWithFlags(stream, flags));
-}
-
-inline static hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority) {
-    return hipCUDAErrorTohipError(cudaStreamCreateWithPriority(stream, flags, priority));
-}
-
-inline static hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority) {
-    return hipCUDAErrorTohipError(cudaDeviceGetStreamPriorityRange(leastPriority, greatestPriority));
-}
-
-inline static hipError_t hipStreamCreate(hipStream_t* stream) {
-    return hipCUDAErrorTohipError(cudaStreamCreate(stream));
-}
-
-inline static hipError_t hipStreamSynchronize(hipStream_t stream) {
-    return hipCUDAErrorTohipError(cudaStreamSynchronize(stream));
-}
-
-inline static hipError_t hipStreamDestroy(hipStream_t stream) {
-    return hipCUDAErrorTohipError(cudaStreamDestroy(stream));
-}
-
-inline static hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) {
-    return hipCUDAErrorTohipError(cudaStreamGetFlags(stream, flags));
-}
-
-inline static hipError_t hipStreamGetPriority(hipStream_t stream, int *priority) {
-    return hipCUDAErrorTohipError(cudaStreamGetPriority(stream, priority));
-}
-
-inline static hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event,
-                                            unsigned int flags) {
-    return hipCUDAErrorTohipError(cudaStreamWaitEvent(stream, event, flags));
-}
-
-inline static hipError_t hipStreamQuery(hipStream_t stream) {
-    return hipCUDAErrorTohipError(cudaStreamQuery(stream));
-}
-
-inline static hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback,
-                                              void* userData, unsigned int flags) {
-    return hipCUDAErrorTohipError(
-        cudaStreamAddCallback(stream, (cudaStreamCallback_t)callback, userData, flags));
-}
-
-inline static hipError_t hipDriverGetVersion(int* driverVersion) {
-    cudaError_t err = cudaDriverGetVersion(driverVersion);
-
-    // Override driver version to match version reported on HCC side.
-    *driverVersion = 4;
-
-    return hipCUDAErrorTohipError(err);
-}
-
-inline static hipError_t hipRuntimeGetVersion(int* runtimeVersion) {
-    return hipCUDAErrorTohipError(cudaRuntimeGetVersion(runtimeVersion));
-}
-
-inline static hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int device, int peerDevice) {
-    return hipCUDAErrorTohipError(cudaDeviceCanAccessPeer(canAccessPeer, device, peerDevice));
-}
-
-inline static hipError_t hipDeviceDisablePeerAccess(int peerDevice) {
-    return hipCUDAErrorTohipError(cudaDeviceDisablePeerAccess(peerDevice));
-}
-
-inline static hipError_t hipDeviceEnablePeerAccess(int peerDevice, unsigned int flags) {
-    return hipCUDAErrorTohipError(cudaDeviceEnablePeerAccess(peerDevice, flags));
-}
-
-inline static hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx) {
-    return hipCUResultTohipError(cuCtxDisablePeerAccess(peerCtx));
-}
-
-inline static hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) {
-    return hipCUResultTohipError(cuCtxEnablePeerAccess(peerCtx, flags));
-}
-
-inline static hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags,
-                                                     int* active) {
-    return hipCUResultTohipError(cuDevicePrimaryCtxGetState(dev, flags, active));
-}
-
-inline static hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev) {
-    return hipCUResultTohipError(cuDevicePrimaryCtxRelease(dev));
-}
-
-inline static hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev) {
-    return hipCUResultTohipError(cuDevicePrimaryCtxRetain(pctx, dev));
-}
-
-inline static hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev) {
-    return hipCUResultTohipError(cuDevicePrimaryCtxReset(dev));
-}
-
-inline static hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags) {
-    return hipCUResultTohipError(cuDevicePrimaryCtxSetFlags(dev, flags));
-}
-
-inline static hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize,
-                                               hipDeviceptr_t dptr) {
-    return hipCUResultTohipError(cuMemGetAddressRange(pbase, psize, dptr));
-}
-
-inline static hipError_t hipMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevice,
-                                       size_t count) {
-    return hipCUDAErrorTohipError(cudaMemcpyPeer(dst, dstDevice, src, srcDevice, count));
-}
-
-inline static hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src,
-                                            int srcDevice, size_t count,
-                                            hipStream_t stream __dparm(0)) {
-    return hipCUDAErrorTohipError(
-        cudaMemcpyPeerAsync(dst, dstDevice, src, srcDevice, count, stream));
-}
-
-// Profile APIs:
-inline static hipError_t hipProfilerStart() { return hipCUDAErrorTohipError(cudaProfilerStart()); }
-
-inline static hipError_t hipProfilerStop() { return hipCUDAErrorTohipError(cudaProfilerStop()); }
-
-inline static hipError_t hipGetDeviceFlags(unsigned int* flags) {
-    return hipCUDAErrorTohipError(cudaGetDeviceFlags(flags));
-}
-
-inline static hipError_t hipSetDeviceFlags(unsigned int flags) {
-    return hipCUDAErrorTohipError(cudaSetDeviceFlags(flags));
-}
-
-inline static hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned int flags) {
-    return hipCUDAErrorTohipError(cudaEventCreateWithFlags(event, flags));
-}
-
-inline static hipError_t hipEventQuery(hipEvent_t event) {
-    return hipCUDAErrorTohipError(cudaEventQuery(event));
-}
-
-inline static hipError_t hipCtxCreate(hipCtx_t* ctx, unsigned int flags, hipDevice_t device) {
-    return hipCUResultTohipError(cuCtxCreate(ctx, flags, device));
-}
-
-inline static hipError_t hipCtxDestroy(hipCtx_t ctx) {
-    return hipCUResultTohipError(cuCtxDestroy(ctx));
-}
-
-inline static hipError_t hipCtxPopCurrent(hipCtx_t* ctx) {
-    return hipCUResultTohipError(cuCtxPopCurrent(ctx));
-}
-
-inline static hipError_t hipCtxPushCurrent(hipCtx_t ctx) {
-    return hipCUResultTohipError(cuCtxPushCurrent(ctx));
-}
-
-inline static hipError_t hipCtxSetCurrent(hipCtx_t ctx) {
-    return hipCUResultTohipError(cuCtxSetCurrent(ctx));
-}
-
-inline static hipError_t hipCtxGetCurrent(hipCtx_t* ctx) {
-    return hipCUResultTohipError(cuCtxGetCurrent(ctx));
-}
-
-inline static hipError_t hipCtxGetDevice(hipDevice_t* device) {
-    return hipCUResultTohipError(cuCtxGetDevice(device));
-}
-
-inline static hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion) {
-    return hipCUResultTohipError(cuCtxGetApiVersion(ctx, (unsigned int*)apiVersion));
-}
-
-inline static hipError_t hipCtxGetCacheConfig(hipFuncCache* cacheConfig) {
-    return hipCUResultTohipError(cuCtxGetCacheConfig(cacheConfig));
-}
-
-inline static hipError_t hipCtxSetCacheConfig(hipFuncCache cacheConfig) {
-    return hipCUResultTohipError(cuCtxSetCacheConfig(cacheConfig));
-}
-
-inline static hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config) {
-    return hipCUResultTohipError(cuCtxSetSharedMemConfig((CUsharedconfig)config));
-}
-
-inline static hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig) {
-    return hipCUResultTohipError(cuCtxGetSharedMemConfig((CUsharedconfig*)pConfig));
-}
-
-inline static hipError_t hipCtxSynchronize(void) {
-    return hipCUResultTohipError(cuCtxSynchronize());
-}
-
-inline static hipError_t hipCtxGetFlags(unsigned int* flags) {
-    return hipCUResultTohipError(cuCtxGetFlags(flags));
-}
-
-inline static hipError_t hipCtxDetach(hipCtx_t ctx) {
-    return hipCUResultTohipError(cuCtxDetach(ctx));
-}
-
-inline static hipError_t hipDeviceGet(hipDevice_t* device, int ordinal) {
-    return hipCUResultTohipError(cuDeviceGet(device, ordinal));
-}
-
-inline static hipError_t hipDeviceComputeCapability(int* major, int* minor, hipDevice_t device) {
-    return hipCUResultTohipError(cuDeviceComputeCapability(major, minor, device));
-}
-
-inline static hipError_t hipDeviceGetName(char* name, int len, hipDevice_t device) {
-    return hipCUResultTohipError(cuDeviceGetName(name, len, device));
-}
-
-inline static hipError_t hipDeviceGetP2PAttribute(int* value, hipDeviceP2PAttr attr,
-                                                  int srcDevice, int dstDevice) {
-    return hipCUDAErrorTohipError(cudaDeviceGetP2PAttribute(value, attr, srcDevice, dstDevice));
-}
-
-inline static hipError_t hipDeviceGetPCIBusId(char* pciBusId, int len, hipDevice_t device) {
-    return hipCUDAErrorTohipError(cudaDeviceGetPCIBusId(pciBusId, len, device));
-}
-
-inline static hipError_t hipDeviceGetByPCIBusId(int* device, const char* pciBusId) {
-    return hipCUDAErrorTohipError(cudaDeviceGetByPCIBusId(device, pciBusId));
-}
-
-inline static hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig* config) {
-    return hipCUDAErrorTohipError(cudaDeviceGetSharedMemConfig(config));
-}
-
-inline static hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config) {
-    return hipCUDAErrorTohipError(cudaDeviceSetSharedMemConfig(config));
-}
-
-inline static hipError_t hipDeviceGetLimit(size_t* pValue, hipLimit_t limit) {
-    return hipCUDAErrorTohipError(cudaDeviceGetLimit(pValue, limit));
-}
-
-inline static hipError_t hipDeviceTotalMem(size_t* bytes, hipDevice_t device) {
-    return hipCUResultTohipError(cuDeviceTotalMem(bytes, device));
-}
-
-inline static hipError_t hipModuleLoad(hipModule_t* module, const char* fname) {
-    return hipCUResultTohipError(cuModuleLoad(module, fname));
-}
-
-inline static hipError_t hipModuleUnload(hipModule_t hmod) {
-    return hipCUResultTohipError(cuModuleUnload(hmod));
-}
-
-inline static hipError_t hipModuleGetFunction(hipFunction_t* function, hipModule_t module,
-                                              const char* kname) {
-    return hipCUResultTohipError(cuModuleGetFunction(function, module, kname));
-}
-
-inline static hipError_t hipModuleGetTexRef(hipTexRef* pTexRef, hipModule_t hmod, const char* name){
-    hipCUResultTohipError(cuModuleGetTexRef(pTexRef, hmod, name));
-}
-
-inline static hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) {
-    return hipCUDAErrorTohipError(cudaFuncGetAttributes(attr, func));
-}
-
-inline static hipError_t hipFuncGetAttribute (int* value, hipFunction_attribute attrib, hipFunction_t hfunc) {
-    return hipCUResultTohipError(cuFuncGetAttribute(value, attrib, hfunc));
-}
-
-inline static hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod,
-                                            const char* name) {
-    return hipCUResultTohipError(cuModuleGetGlobal(dptr, bytes, hmod, name));
-}
-
-inline static hipError_t hipModuleLoadData(hipModule_t* module, const void* image) {
-    return hipCUResultTohipError(cuModuleLoadData(module, image));
-}
-
-inline static hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image,
-                                             unsigned int numOptions, hipJitOption* options,
-                                             void** optionValues) {
-    return hipCUResultTohipError(
-        cuModuleLoadDataEx(module, image, numOptions, options, optionValues));
-}
-
-inline static hipError_t hipLaunchKernel(const void* function_address, dim3 numBlocks,
-					 dim3 dimBlocks, void** args, size_t sharedMemBytes,
-					 hipStream_t stream)
-{
-   return hipCUDAErrorTohipError(cudaLaunchKernel(function_address,numBlocks,dimBlocks,args,sharedMemBytes,stream));
-}
-
-inline static hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX,
-                                               unsigned int gridDimY, unsigned int gridDimZ,
-                                               unsigned int blockDimX, unsigned int blockDimY,
-                                               unsigned int blockDimZ, unsigned int sharedMemBytes,
-                                               hipStream_t stream, void** kernelParams,
-                                               void** extra) {
-    return hipCUResultTohipError(cuLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX,
-                                                blockDimY, blockDimZ, sharedMemBytes, stream,
-                                                kernelParams, extra));
-}
-
-inline static hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t cacheConfig) {
-    return hipCUDAErrorTohipError(cudaFuncSetCacheConfig(func, cacheConfig));
-}
-
-__HIP_DEPRECATED inline static hipError_t hipBindTexture(size_t* offset,
-                                                         struct textureReference* tex,
-                                                         const void* devPtr,
-                                                         const hipChannelFormatDesc* desc,
-                                                         size_t size __dparm(UINT_MAX)) {
-    return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
-}
-
-__HIP_DEPRECATED inline static hipError_t hipBindTexture2D(
-    size_t* offset, struct textureReference* tex, const void* devPtr,
-    const hipChannelFormatDesc* desc, size_t width, size_t height, size_t pitch) {
-    return hipCUDAErrorTohipError(cudaBindTexture2D(offset, tex, devPtr, desc, width, height, pitch));
-}
-
-inline static hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w,
-                                                        hipChannelFormatKind f) {
-    return cudaCreateChannelDesc(x, y, z, w, hipChannelFormatKindToCudaChannelFormatKind(f));
-}
-
-inline static hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject,
-                                                const hipResourceDesc* pResDesc,
-                                                const hipTextureDesc* pTexDesc,
-                                                const hipResourceViewDesc* pResViewDesc) {
-    return hipCUDAErrorTohipError(
-        cudaCreateTextureObject(pTexObject, pResDesc, pTexDesc, pResViewDesc));
-}
-
-inline static hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject) {
-    return hipCUDAErrorTohipError(cudaDestroyTextureObject(textureObject));
-}
-
-inline static hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject,
-                                                const hipResourceDesc* pResDesc) {
-    return hipCUDAErrorTohipError(cudaCreateSurfaceObject(pSurfObject, pResDesc));
-}
-
-inline static hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject) {
-    return hipCUDAErrorTohipError(cudaDestroySurfaceObject(surfaceObject));
-}
-
-inline static hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc,
-                                           hipTextureObject_t textureObject) {
-    return hipCUDAErrorTohipError(cudaGetTextureObjectResourceDesc( pResDesc, textureObject));
-}
-
-__HIP_DEPRECATED inline static hipError_t hipGetTextureAlignmentOffset(
-    size_t* offset, const struct textureReference* texref) {
-    return hipCUDAErrorTohipError(cudaGetTextureAlignmentOffset(offset,texref));
-}
-
-inline static hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array)
-{
-    return hipCUDAErrorTohipError(cudaGetChannelDesc(desc,array));
-}
-
-inline static hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDim,
-                                      void** kernelParams, unsigned int sharedMemBytes,
-                                      hipStream_t stream) {
-    return hipCUDAErrorTohipError(
-            cudaLaunchCooperativeKernel(f, gridDim, blockDim, kernelParams, sharedMemBytes, stream));
-}
-
-inline static hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
-                                                 int  numDevices, unsigned int  flags) {
-    return hipCUDAErrorTohipError(cudaLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags));
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#ifdef __CUDACC__
-
-template<class T>
-inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
-                                                                      T func,
-                                                                      int blockSize,
-                                                                      size_t dynamicSMemSize) {
-    return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func,
-                                                            blockSize, dynamicSMemSize));
-}
-
-template <class T>
-inline static hipError_t hipOccupancyMaxPotentialBlockSize(int* minGridSize, int* blockSize, T func,
-                                                           size_t dynamicSMemSize = 0,
-                                                           int blockSizeLimit = 0) {
-    return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func,
-                                                           dynamicSMemSize, blockSizeLimit));
-}
-
-template <class T>
-inline static hipError_t hipOccupancyMaxPotentialBlockSizeWithFlags(int* minGridSize, int* blockSize, T func,
-                                                           size_t dynamicSMemSize = 0,
-                                                           int blockSizeLimit = 0, unsigned int  flags = 0) {
-    return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func,
-                                                           dynamicSMemSize, blockSizeLimit, flags));
-}
-
-template <class T>
-inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( int* numBlocks, T func,
-                                              int  blockSize, size_t dynamicSMemSize,unsigned int flags) {
-    return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func,
-                                                                 blockSize, dynamicSMemSize, flags));
-}
-
-template <class T, int dim, enum cudaTextureReadMode readMode>
-inline static hipError_t hipBindTexture(size_t* offset, const struct texture<T, dim, readMode>& tex,
-                                        const void* devPtr, size_t size = UINT_MAX) {
-    return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, size));
-}
-
-template <class T, int dim, enum cudaTextureReadMode readMode>
-inline static hipError_t hipBindTexture(size_t* offset, struct texture<T, dim, readMode>& tex,
-                                        const void* devPtr, const hipChannelFormatDesc& desc,
-                                        size_t size = UINT_MAX) {
-    return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
-}
-
-template <class T, int dim, enum cudaTextureReadMode readMode>
-__HIP_DEPRECATED inline static hipError_t hipUnbindTexture(struct texture<T, dim, readMode>* tex) {
-    return hipCUDAErrorTohipError(cudaUnbindTexture(tex));
-}
-
-template <class T, int dim, enum cudaTextureReadMode readMode>
-__HIP_DEPRECATED inline static hipError_t hipUnbindTexture(struct texture<T, dim, readMode>& tex) {
-    return hipCUDAErrorTohipError(cudaUnbindTexture(tex));
-}
-
-template <class T, int dim, enum cudaTextureReadMode readMode>
-__HIP_DEPRECATED inline static hipError_t hipBindTextureToArray(
-    struct texture<T, dim, readMode>& tex, hipArray_const_t array,
-    const hipChannelFormatDesc& desc) {
-    return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc));
-}
-
-template <class T, int dim, enum cudaTextureReadMode readMode>
-__HIP_DEPRECATED inline static hipError_t hipBindTextureToArray(
-    struct texture<T, dim, readMode>* tex, hipArray_const_t array,
-    const hipChannelFormatDesc* desc) {
-    return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc));
-}
-
-template <class T, int dim, enum cudaTextureReadMode readMode>
-__HIP_DEPRECATED inline static hipError_t hipBindTextureToArray(
-    struct texture<T, dim, readMode>& tex, hipArray_const_t array) {
-    return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array));
-}
-
-template <class T>
-inline static hipChannelFormatDesc hipCreateChannelDesc() {
-    return cudaCreateChannelDesc<T>();
-}
-
-template <class T>
-inline static hipError_t hipLaunchCooperativeKernel(T f, dim3 gridDim, dim3 blockDim,
-                                             void** kernelParams, unsigned int sharedMemBytes, hipStream_t stream) {
-    return hipCUDAErrorTohipError(
-            cudaLaunchCooperativeKernel(reinterpret_cast<const void*>(f), gridDim, blockDim, kernelParams, sharedMemBytes, stream));
-}
-
-inline static hipError_t hipTexRefSetAddressMode(hipTexRef hTexRef, int dim, hipAddress_mode am){
-    return hipCUResultTohipError(cuTexRefSetAddressMode(hTexRef,dim,am));
-}
-
-inline static hipError_t hipTexRefSetFilterMode(hipTexRef hTexRef, hipFilter_mode fm){
-    return hipCUResultTohipError(cuTexRefSetFilterMode(hTexRef,fm));
-}
-
-inline static hipError_t hipTexRefSetAddress(size_t *ByteOffset, hipTexRef hTexRef, hipDeviceptr_t dptr, size_t bytes){
-   return hipCUResultTohipError(cuTexRefSetAddress(ByteOffset,hTexRef,dptr,bytes));
-}
-
-inline static hipError_t hipTexRefSetAddress2D(hipTexRef hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, hipDeviceptr_t dptr, size_t Pitch){
-   return hipCUResultTohipError(cuTexRefSetAddress2D(hTexRef,desc,dptr,Pitch));
-}
-
-inline static hipError_t hipTexRefSetFormat(hipTexRef hTexRef, hipArray_Format fmt, int NumPackedComponents){
-   return hipCUResultTohipError(cuTexRefSetFormat(hTexRef,fmt,NumPackedComponents));
-}
-
-inline static hipError_t hipTexRefSetFlags(hipTexRef hTexRef, unsigned int Flags){
-   return hipCUResultTohipError(cuTexRefSetFlags(hTexRef,Flags));
-}
-
-inline static hipError_t hipTexRefSetArray(hipTexRef hTexRef, hiparray hArray, unsigned int Flags){
-   return hipCUResultTohipError(cuTexRefSetArray(hTexRef,hArray,Flags));
-}
-
-inline static hipError_t hipArrayCreate(hiparray* pHandle, const HIP_ARRAY_DESCRIPTOR* pAllocateArray){
-   return hipCUResultTohipError(cuArrayCreate(pHandle, pAllocateArray));
-}
-
-inline static hipError_t hipArrayDestroy(hiparray hArray){
-   return hipCUResultTohipError(cuArrayDestroy(hArray));
-}
-
-#endif  //__CUDACC__
-
-#endif  // HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
diff --git a/third_party/rocm/include/hip/nvcc_detail/hip_texture_types.h b/third_party/rocm/include/hip/nvcc_detail/hip_texture_types.h
deleted file mode 100644
index 751dd8e..0000000
--- a/third_party/rocm/include/hip/nvcc_detail/hip_texture_types.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_TEXTURE_TYPES_H
-#define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_TEXTURE_TYPES_H
-
-#include <texture_types.h>
-
-#endif
diff --git a/third_party/rocm/include/hip/texture_types.h b/third_party/rocm/include/hip/texture_types.h
deleted file mode 100644
index 7d78570..0000000
--- a/third_party/rocm/include/hip/texture_types.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
-Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef HIP_INCLUDE_HIP_TEXTURE_TYPES_H
-#define HIP_INCLUDE_HIP_TEXTURE_TYPES_H
-
-#include <hip/hip_common.h>
-
-#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
-#include <hip/hcc_detail/texture_types.h>
-#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
-#include "texture_types.h"
-#else
-#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
-#endif
-
-#endif
diff --git a/third_party/rocm/include/hsa/Brig.h b/third_party/rocm/include/hsa/Brig.h
deleted file mode 100644
index 4f34bd1..0000000
--- a/third_party/rocm/include/hsa/Brig.h
+++ /dev/null
@@ -1,1131 +0,0 @@
-// University of Illinois/NCSA
-// Open Source License
-//
-// Copyright (c) 2013-2015, Advanced Micro Devices, Inc.
-// All rights reserved.
-//
-// Developed by:
-//
-//     HSA Team
-//
-//     Advanced Micro Devices, Inc
-//
-//     www.amd.com
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy of
-// this software and associated documentation files (the "Software"), to deal with
-// the Software without restriction, including without limitation the rights to
-// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-// of the Software, and to permit persons to whom the Software is furnished to do
-// so, subject to the following conditions:
-//
-//     * Redistributions of source code must retain the above copyright notice,
-//       this list of conditions and the following disclaimers.
-//
-//     * Redistributions in binary form must reproduce the above copyright notice,
-//       this list of conditions and the following disclaimers in the
-//       documentation and/or other materials provided with the distribution.
-//
-//     * Neither the names of the LLVM Team, University of Illinois at
-//       Urbana-Champaign, nor the names of its contributors may be used to
-//       endorse or promote products derived from this Software without specific
-//       prior written permission.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
-// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
-// SOFTWARE.
-
-#ifndef INCLUDED_BRIG_H
-#define INCLUDED_BRIG_H
-
-#include <stddef.h>   /* size_t */
-#include <stdint.h>   /* uintXX_t */
-
-#ifdef __cplusplus
-extern "C" {
-#endif  /* __cplusplus */
-
-/*========================================================================================*/
-/* =======================================================================================*/
-/* =======================================================================================*/
-/* =======================================================================================*/
-
-typedef uint32_t BrigCodeOffset32_t;
-typedef uint32_t BrigOperandOffset32_t;
-typedef uint32_t BrigDataOffset32_t;
-
-typedef BrigDataOffset32_t BrigDataOffsetCodeList32_t;
-typedef BrigDataOffset32_t BrigDataOffsetOperandList32_t;
-typedef BrigDataOffset32_t BrigDataOffsetString32_t;
-
-typedef uint32_t BrigVersion32_t;
-enum BrigVersion {
-    BRIG_VERSION_HSAIL_MAJOR = 1,
-    BRIG_VERSION_HSAIL_MINOR = 0,
-    BRIG_VERSION_BRIG_MAJOR  = 1,
-    BRIG_VERSION_BRIG_MINOR  = 0
-};
-
-typedef uint16_t BrigKind16_t;
-enum BrigKind {
-    BRIG_KIND_NONE = 0x0000,
-
-    BRIG_KIND_DIRECTIVE_BEGIN = 0x1000,
-        BRIG_KIND_DIRECTIVE_ARG_BLOCK_END = 0x1000,
-        BRIG_KIND_DIRECTIVE_ARG_BLOCK_START = 0x1001,
-        BRIG_KIND_DIRECTIVE_COMMENT = 0x1002,
-        BRIG_KIND_DIRECTIVE_CONTROL = 0x1003,
-        BRIG_KIND_DIRECTIVE_EXTENSION = 0x1004,
-        BRIG_KIND_DIRECTIVE_FBARRIER = 0x1005,
-        BRIG_KIND_DIRECTIVE_FUNCTION = 0x1006,
-        BRIG_KIND_DIRECTIVE_INDIRECT_FUNCTION = 0x1007,
-        BRIG_KIND_DIRECTIVE_KERNEL = 0x1008,
-        BRIG_KIND_DIRECTIVE_LABEL = 0x1009,
-        BRIG_KIND_DIRECTIVE_LOC = 0x100a,
-        BRIG_KIND_DIRECTIVE_MODULE = 0x100b,
-        BRIG_KIND_DIRECTIVE_PRAGMA = 0x100c,
-        BRIG_KIND_DIRECTIVE_SIGNATURE = 0x100d,
-        BRIG_KIND_DIRECTIVE_VARIABLE = 0x100e,
-    BRIG_KIND_DIRECTIVE_END = 0x100f,
-
-    BRIG_KIND_INST_BEGIN = 0x2000,
-        BRIG_KIND_INST_ADDR = 0x2000,
-        BRIG_KIND_INST_ATOMIC = 0x2001,
-        BRIG_KIND_INST_BASIC = 0x2002,
-        BRIG_KIND_INST_BR = 0x2003,
-        BRIG_KIND_INST_CMP = 0x2004,
-        BRIG_KIND_INST_CVT = 0x2005,
-        BRIG_KIND_INST_IMAGE = 0x2006,
-        BRIG_KIND_INST_LANE = 0x2007,
-        BRIG_KIND_INST_MEM = 0x2008,
-        BRIG_KIND_INST_MEM_FENCE = 0x2009,
-        BRIG_KIND_INST_MOD = 0x200a,
-        BRIG_KIND_INST_QUERY_IMAGE = 0x200b,
-        BRIG_KIND_INST_QUERY_SAMPLER = 0x200c,
-        BRIG_KIND_INST_QUEUE = 0x200d,
-        BRIG_KIND_INST_SEG = 0x200e,
-        BRIG_KIND_INST_SEG_CVT = 0x200f,
-        BRIG_KIND_INST_SIGNAL = 0x2010,
-        BRIG_KIND_INST_SOURCE_TYPE = 0x2011,
-    BRIG_KIND_INST_END = 0x2012,
-
-    BRIG_KIND_OPERAND_BEGIN = 0x3000,
-        BRIG_KIND_OPERAND_ADDRESS = 0x3000,
-        BRIG_KIND_OPERAND_ALIGN = 0x3001,
-        BRIG_KIND_OPERAND_CODE_LIST = 0x3002,
-        BRIG_KIND_OPERAND_CODE_REF = 0x3003,
-        BRIG_KIND_OPERAND_CONSTANT_BYTES = 0x3004,
-        BRIG_KIND_OPERAND_RESERVED = 0x3005,
-        BRIG_KIND_OPERAND_CONSTANT_IMAGE = 0x3006,
-        BRIG_KIND_OPERAND_CONSTANT_OPERAND_LIST = 0x3007,
-        BRIG_KIND_OPERAND_CONSTANT_SAMPLER = 0x3008,
-        BRIG_KIND_OPERAND_OPERAND_LIST = 0x3009,
-        BRIG_KIND_OPERAND_REGISTER = 0x300a,
-        BRIG_KIND_OPERAND_STRING = 0x300b,
-        BRIG_KIND_OPERAND_WAVESIZE = 0x300c,
-    BRIG_KIND_OPERAND_END = 0x300d
-};
-
-typedef uint8_t BrigAlignment8_t;
-enum BrigAlignment {
-    BRIG_ALIGNMENT_NONE = 0,
-    BRIG_ALIGNMENT_1 = 1,
-    BRIG_ALIGNMENT_2 = 2,
-    BRIG_ALIGNMENT_4 = 3,
-    BRIG_ALIGNMENT_8 = 4,
-    BRIG_ALIGNMENT_16 = 5,
-    BRIG_ALIGNMENT_32 = 6,
-    BRIG_ALIGNMENT_64 = 7,
-    BRIG_ALIGNMENT_128 = 8,
-    BRIG_ALIGNMENT_256 = 9,
-    BRIG_ALIGNMENT_MAX = BRIG_ALIGNMENT_256
-};
-
-typedef uint8_t BrigAllocation8_t;
-enum BrigAllocation {
-    BRIG_ALLOCATION_NONE = 0,
-    BRIG_ALLOCATION_PROGRAM = 1,
-    BRIG_ALLOCATION_AGENT = 2,
-    BRIG_ALLOCATION_AUTOMATIC = 3
-};
-
-typedef uint8_t BrigAluModifier8_t;
-enum BrigAluModifierMask {
-    BRIG_ALU_FTZ = 1
-};
-
-typedef uint8_t BrigAtomicOperation8_t;
-enum BrigAtomicOperation {
-    BRIG_ATOMIC_ADD = 0,
-    BRIG_ATOMIC_AND = 1,
-    BRIG_ATOMIC_CAS = 2,
-    BRIG_ATOMIC_EXCH = 3,
-    BRIG_ATOMIC_LD = 4,
-    BRIG_ATOMIC_MAX = 5,
-    BRIG_ATOMIC_MIN = 6,
-    BRIG_ATOMIC_OR = 7,
-    BRIG_ATOMIC_ST = 8,
-    BRIG_ATOMIC_SUB = 9,
-    BRIG_ATOMIC_WRAPDEC = 10,
-    BRIG_ATOMIC_WRAPINC = 11,
-    BRIG_ATOMIC_XOR = 12,
-    BRIG_ATOMIC_WAIT_EQ = 13,
-    BRIG_ATOMIC_WAIT_NE = 14,
-    BRIG_ATOMIC_WAIT_LT = 15,
-    BRIG_ATOMIC_WAIT_GTE = 16,
-    BRIG_ATOMIC_WAITTIMEOUT_EQ = 17,
-    BRIG_ATOMIC_WAITTIMEOUT_NE = 18,
-    BRIG_ATOMIC_WAITTIMEOUT_LT = 19,
-    BRIG_ATOMIC_WAITTIMEOUT_GTE = 20
-};
-
-typedef uint8_t BrigCompareOperation8_t;
-enum BrigCompareOperation {
-    BRIG_COMPARE_EQ = 0,
-    BRIG_COMPARE_NE = 1,
-    BRIG_COMPARE_LT = 2,
-    BRIG_COMPARE_LE = 3,
-    BRIG_COMPARE_GT = 4,
-    BRIG_COMPARE_GE = 5,
-    BRIG_COMPARE_EQU = 6,
-    BRIG_COMPARE_NEU = 7,
-    BRIG_COMPARE_LTU = 8,
-    BRIG_COMPARE_LEU = 9,
-    BRIG_COMPARE_GTU = 10,
-    BRIG_COMPARE_GEU = 11,
-    BRIG_COMPARE_NUM = 12,
-    BRIG_COMPARE_NAN = 13,
-    BRIG_COMPARE_SEQ = 14,
-    BRIG_COMPARE_SNE = 15,
-    BRIG_COMPARE_SLT = 16,
-    BRIG_COMPARE_SLE = 17,
-    BRIG_COMPARE_SGT = 18,
-    BRIG_COMPARE_SGE = 19,
-    BRIG_COMPARE_SGEU = 20,
-    BRIG_COMPARE_SEQU = 21,
-    BRIG_COMPARE_SNEU = 22,
-    BRIG_COMPARE_SLTU = 23,
-    BRIG_COMPARE_SLEU = 24,
-    BRIG_COMPARE_SNUM = 25,
-    BRIG_COMPARE_SNAN = 26,
-    BRIG_COMPARE_SGTU = 27
-};
-
-typedef uint16_t BrigControlDirective16_t;
-enum BrigControlDirective {
-    BRIG_CONTROL_NONE = 0,
-    BRIG_CONTROL_ENABLEBREAKEXCEPTIONS = 1,
-    BRIG_CONTROL_ENABLEDETECTEXCEPTIONS = 2,
-    BRIG_CONTROL_MAXDYNAMICGROUPSIZE = 3,
-    BRIG_CONTROL_MAXFLATGRIDSIZE = 4,
-    BRIG_CONTROL_MAXFLATWORKGROUPSIZE = 5,
-    BRIG_CONTROL_REQUIREDDIM = 6,
-    BRIG_CONTROL_REQUIREDGRIDSIZE = 7,
-    BRIG_CONTROL_REQUIREDWORKGROUPSIZE = 8,
-    BRIG_CONTROL_REQUIRENOPARTIALWORKGROUPS = 9
-};
-
-typedef uint8_t BrigExecutableModifier8_t;
-enum BrigExecutableModifierMask {
-    BRIG_EXECUTABLE_DEFINITION = 1
-};
-
-typedef uint8_t BrigImageChannelOrder8_t;
-enum BrigImageChannelOrder {
-    BRIG_CHANNEL_ORDER_A = 0,
-    BRIG_CHANNEL_ORDER_R = 1,
-    BRIG_CHANNEL_ORDER_RX = 2,
-    BRIG_CHANNEL_ORDER_RG = 3,
-    BRIG_CHANNEL_ORDER_RGX = 4,
-    BRIG_CHANNEL_ORDER_RA = 5,
-    BRIG_CHANNEL_ORDER_RGB = 6,
-    BRIG_CHANNEL_ORDER_RGBX = 7,
-    BRIG_CHANNEL_ORDER_RGBA = 8,
-    BRIG_CHANNEL_ORDER_BGRA = 9,
-    BRIG_CHANNEL_ORDER_ARGB = 10,
-    BRIG_CHANNEL_ORDER_ABGR = 11,
-    BRIG_CHANNEL_ORDER_SRGB = 12,
-    BRIG_CHANNEL_ORDER_SRGBX = 13,
-    BRIG_CHANNEL_ORDER_SRGBA = 14,
-    BRIG_CHANNEL_ORDER_SBGRA = 15,
-    BRIG_CHANNEL_ORDER_INTENSITY = 16,
-    BRIG_CHANNEL_ORDER_LUMINANCE = 17,
-    BRIG_CHANNEL_ORDER_DEPTH = 18,
-    BRIG_CHANNEL_ORDER_DEPTH_STENCIL = 19,
-
-    BRIG_CHANNEL_ORDER_FIRST_USER_DEFINED = 128
-};
-
-typedef uint8_t BrigImageChannelType8_t;
-enum BrigImageChannelType {
-    BRIG_CHANNEL_TYPE_SNORM_INT8 = 0,
-    BRIG_CHANNEL_TYPE_SNORM_INT16 = 1,
-    BRIG_CHANNEL_TYPE_UNORM_INT8 = 2,
-    BRIG_CHANNEL_TYPE_UNORM_INT16 = 3,
-    BRIG_CHANNEL_TYPE_UNORM_INT24 = 4,
-    BRIG_CHANNEL_TYPE_UNORM_SHORT_555 = 5,
-    BRIG_CHANNEL_TYPE_UNORM_SHORT_565 = 6,
-    BRIG_CHANNEL_TYPE_UNORM_INT_101010 = 7,
-    BRIG_CHANNEL_TYPE_SIGNED_INT8 = 8,
-    BRIG_CHANNEL_TYPE_SIGNED_INT16 = 9,
-    BRIG_CHANNEL_TYPE_SIGNED_INT32 = 10,
-    BRIG_CHANNEL_TYPE_UNSIGNED_INT8 = 11,
-    BRIG_CHANNEL_TYPE_UNSIGNED_INT16 = 12,
-    BRIG_CHANNEL_TYPE_UNSIGNED_INT32 = 13,
-    BRIG_CHANNEL_TYPE_HALF_FLOAT = 14,
-    BRIG_CHANNEL_TYPE_FLOAT = 15,
-
-    BRIG_CHANNEL_TYPE_FIRST_USER_DEFINED = 128
-};
-
-typedef uint8_t BrigImageGeometry8_t;
-enum BrigImageGeometry {
-    BRIG_GEOMETRY_1D = 0,
-    BRIG_GEOMETRY_2D = 1,
-    BRIG_GEOMETRY_3D = 2,
-    BRIG_GEOMETRY_1DA = 3,
-    BRIG_GEOMETRY_2DA = 4,
-    BRIG_GEOMETRY_1DB = 5,
-    BRIG_GEOMETRY_2DDEPTH = 6,
-    BRIG_GEOMETRY_2DADEPTH = 7,
-
-    BRIG_GEOMETRY_FIRST_USER_DEFINED = 128
-};
-
-typedef uint8_t BrigImageQuery8_t;
-enum BrigImageQuery {
-    BRIG_IMAGE_QUERY_WIDTH = 0,
-    BRIG_IMAGE_QUERY_HEIGHT = 1,
-    BRIG_IMAGE_QUERY_DEPTH = 2,
-    BRIG_IMAGE_QUERY_ARRAY = 3,
-    BRIG_IMAGE_QUERY_CHANNELORDER = 4,
-    BRIG_IMAGE_QUERY_CHANNELTYPE = 5,
-
-    BRIG_IMAGE_QUERY_FIRST_USER_DEFINED = 6
-};
-
-typedef uint8_t BrigLinkage8_t;
-enum BrigLinkage {
-    BRIG_LINKAGE_NONE = 0,
-    BRIG_LINKAGE_PROGRAM = 1,
-    BRIG_LINKAGE_MODULE = 2,
-    BRIG_LINKAGE_FUNCTION = 3,
-    BRIG_LINKAGE_ARG = 4
-};
-
-typedef uint8_t BrigMachineModel8_t;
-enum BrigMachineModel {
-    BRIG_MACHINE_SMALL = 0,
-    BRIG_MACHINE_LARGE = 1,
-};
-
-typedef uint8_t BrigMemoryModifier8_t;
-enum BrigMemoryModifierMask {
-    BRIG_MEMORY_CONST = 1
-};
-
-typedef uint8_t BrigMemoryOrder8_t;
-enum BrigMemoryOrder {
-    BRIG_MEMORY_ORDER_NONE = 0,
-    BRIG_MEMORY_ORDER_RELAXED = 1,
-    BRIG_MEMORY_ORDER_SC_ACQUIRE = 2,
-    BRIG_MEMORY_ORDER_SC_RELEASE = 3,
-    BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE = 4,
-};
-
-typedef uint8_t BrigMemoryScope8_t;
-enum BrigMemoryScope {
-    BRIG_MEMORY_SCOPE_NONE = 0,
-    BRIG_MEMORY_SCOPE_WORKITEM = 1,
-    BRIG_MEMORY_SCOPE_WAVEFRONT = 2,
-    BRIG_MEMORY_SCOPE_WORKGROUP = 3,
-    BRIG_MEMORY_SCOPE_AGENT = 4,
-    BRIG_MEMORY_SCOPE_SYSTEM = 5,
-};
-
-typedef uint16_t BrigOpcode16_t;
-enum BrigOpcode {
-    BRIG_OPCODE_NOP = 0,
-    BRIG_OPCODE_ABS = 1,
-    BRIG_OPCODE_ADD = 2,
-    BRIG_OPCODE_BORROW = 3,
-    BRIG_OPCODE_CARRY = 4,
-    BRIG_OPCODE_CEIL = 5,
-    BRIG_OPCODE_COPYSIGN = 6,
-    BRIG_OPCODE_DIV = 7,
-    BRIG_OPCODE_FLOOR = 8,
-    BRIG_OPCODE_FMA = 9,
-    BRIG_OPCODE_FRACT = 10,
-    BRIG_OPCODE_MAD = 11,
-    BRIG_OPCODE_MAX = 12,
-    BRIG_OPCODE_MIN = 13,
-    BRIG_OPCODE_MUL = 14,
-    BRIG_OPCODE_MULHI = 15,
-    BRIG_OPCODE_NEG = 16,
-    BRIG_OPCODE_REM = 17,
-    BRIG_OPCODE_RINT = 18,
-    BRIG_OPCODE_SQRT = 19,
-    BRIG_OPCODE_SUB = 20,
-    BRIG_OPCODE_TRUNC = 21,
-    BRIG_OPCODE_MAD24 = 22,
-    BRIG_OPCODE_MAD24HI = 23,
-    BRIG_OPCODE_MUL24 = 24,
-    BRIG_OPCODE_MUL24HI = 25,
-    BRIG_OPCODE_SHL = 26,
-    BRIG_OPCODE_SHR = 27,
-    BRIG_OPCODE_AND = 28,
-    BRIG_OPCODE_NOT = 29,
-    BRIG_OPCODE_OR = 30,
-    BRIG_OPCODE_POPCOUNT = 31,
-    BRIG_OPCODE_XOR = 32,
-    BRIG_OPCODE_BITEXTRACT = 33,
-    BRIG_OPCODE_BITINSERT = 34,
-    BRIG_OPCODE_BITMASK = 35,
-    BRIG_OPCODE_BITREV = 36,
-    BRIG_OPCODE_BITSELECT = 37,
-    BRIG_OPCODE_FIRSTBIT = 38,
-    BRIG_OPCODE_LASTBIT = 39,
-    BRIG_OPCODE_COMBINE = 40,
-    BRIG_OPCODE_EXPAND = 41,
-    BRIG_OPCODE_LDA = 42,
-    BRIG_OPCODE_MOV = 43,
-    BRIG_OPCODE_SHUFFLE = 44,
-    BRIG_OPCODE_UNPACKHI = 45,
-    BRIG_OPCODE_UNPACKLO = 46,
-    BRIG_OPCODE_PACK = 47,
-    BRIG_OPCODE_UNPACK = 48,
-    BRIG_OPCODE_CMOV = 49,
-    BRIG_OPCODE_CLASS = 50,
-    BRIG_OPCODE_NCOS = 51,
-    BRIG_OPCODE_NEXP2 = 52,
-    BRIG_OPCODE_NFMA = 53,
-    BRIG_OPCODE_NLOG2 = 54,
-    BRIG_OPCODE_NRCP = 55,
-    BRIG_OPCODE_NRSQRT = 56,
-    BRIG_OPCODE_NSIN = 57,
-    BRIG_OPCODE_NSQRT = 58,
-    BRIG_OPCODE_BITALIGN = 59,
-    BRIG_OPCODE_BYTEALIGN = 60,
-    BRIG_OPCODE_PACKCVT = 61,
-    BRIG_OPCODE_UNPACKCVT = 62,
-    BRIG_OPCODE_LERP = 63,
-    BRIG_OPCODE_SAD = 64,
-    BRIG_OPCODE_SADHI = 65,
-    BRIG_OPCODE_SEGMENTP = 66,
-    BRIG_OPCODE_FTOS = 67,
-    BRIG_OPCODE_STOF = 68,
-    BRIG_OPCODE_CMP = 69,
-    BRIG_OPCODE_CVT = 70,
-    BRIG_OPCODE_LD = 71,
-    BRIG_OPCODE_ST = 72,
-    BRIG_OPCODE_ATOMIC = 73,
-    BRIG_OPCODE_ATOMICNORET = 74,
-    BRIG_OPCODE_SIGNAL = 75,
-    BRIG_OPCODE_SIGNALNORET = 76,
-    BRIG_OPCODE_MEMFENCE = 77,
-    BRIG_OPCODE_RDIMAGE = 78,
-    BRIG_OPCODE_LDIMAGE = 79,
-    BRIG_OPCODE_STIMAGE = 80,
-    BRIG_OPCODE_IMAGEFENCE = 81,
-    BRIG_OPCODE_QUERYIMAGE = 82,
-    BRIG_OPCODE_QUERYSAMPLER = 83,
-    BRIG_OPCODE_CBR = 84,
-    BRIG_OPCODE_BR = 85,
-    BRIG_OPCODE_SBR = 86,
-    BRIG_OPCODE_BARRIER = 87,
-    BRIG_OPCODE_WAVEBARRIER = 88,
-    BRIG_OPCODE_ARRIVEFBAR = 89,
-    BRIG_OPCODE_INITFBAR = 90,
-    BRIG_OPCODE_JOINFBAR = 91,
-    BRIG_OPCODE_LEAVEFBAR = 92,
-    BRIG_OPCODE_RELEASEFBAR = 93,
-    BRIG_OPCODE_WAITFBAR = 94,
-    BRIG_OPCODE_LDF = 95,
-    BRIG_OPCODE_ACTIVELANECOUNT = 96,
-    BRIG_OPCODE_ACTIVELANEID = 97,
-    BRIG_OPCODE_ACTIVELANEMASK = 98,
-    BRIG_OPCODE_ACTIVELANEPERMUTE = 99,
-    BRIG_OPCODE_CALL = 100,
-    BRIG_OPCODE_SCALL = 101,
-    BRIG_OPCODE_ICALL = 102,
-    BRIG_OPCODE_RET = 103,
-    BRIG_OPCODE_ALLOCA = 104,
-    BRIG_OPCODE_CURRENTWORKGROUPSIZE = 105,
-    BRIG_OPCODE_CURRENTWORKITEMFLATID = 106,
-    BRIG_OPCODE_DIM = 107,
-    BRIG_OPCODE_GRIDGROUPS = 108,
-    BRIG_OPCODE_GRIDSIZE = 109,
-    BRIG_OPCODE_PACKETCOMPLETIONSIG = 110,
-    BRIG_OPCODE_PACKETID = 111,
-    BRIG_OPCODE_WORKGROUPID = 112,
-    BRIG_OPCODE_WORKGROUPSIZE = 113,
-    BRIG_OPCODE_WORKITEMABSID = 114,
-    BRIG_OPCODE_WORKITEMFLATABSID = 115,
-    BRIG_OPCODE_WORKITEMFLATID = 116,
-    BRIG_OPCODE_WORKITEMID = 117,
-    BRIG_OPCODE_CLEARDETECTEXCEPT = 118,
-    BRIG_OPCODE_GETDETECTEXCEPT = 119,
-    BRIG_OPCODE_SETDETECTEXCEPT = 120,
-    BRIG_OPCODE_ADDQUEUEWRITEINDEX = 121,
-    BRIG_OPCODE_CASQUEUEWRITEINDEX = 122,
-    BRIG_OPCODE_LDQUEUEREADINDEX = 123,
-    BRIG_OPCODE_LDQUEUEWRITEINDEX = 124,
-    BRIG_OPCODE_STQUEUEREADINDEX = 125,
-    BRIG_OPCODE_STQUEUEWRITEINDEX = 126,
-    BRIG_OPCODE_CLOCK = 127,
-    BRIG_OPCODE_CUID = 128,
-    BRIG_OPCODE_DEBUGTRAP = 129,
-    BRIG_OPCODE_GROUPBASEPTR = 130,
-    BRIG_OPCODE_KERNARGBASEPTR = 131,
-    BRIG_OPCODE_LANEID = 132,
-    BRIG_OPCODE_MAXCUID = 133,
-    BRIG_OPCODE_MAXWAVEID = 134,
-    BRIG_OPCODE_NULLPTR = 135,
-    BRIG_OPCODE_WAVEID = 136,
-
-    BRIG_OPCODE_FIRST_USER_DEFINED = 32768,
-};
-
-typedef uint8_t BrigPack8_t;
-enum BrigPack {
-    BRIG_PACK_NONE = 0,
-    BRIG_PACK_PP = 1,
-    BRIG_PACK_PS = 2,
-    BRIG_PACK_SP = 3,
-    BRIG_PACK_SS = 4,
-    BRIG_PACK_S = 5,
-    BRIG_PACK_P = 6,
-    BRIG_PACK_PPSAT = 7,
-    BRIG_PACK_PSSAT = 8,
-    BRIG_PACK_SPSAT = 9,
-    BRIG_PACK_SSSAT = 10,
-    BRIG_PACK_SSAT = 11,
-    BRIG_PACK_PSAT = 12
-};
-
-typedef uint8_t BrigProfile8_t;
-enum BrigProfile {
-    BRIG_PROFILE_BASE = 0,
-    BRIG_PROFILE_FULL = 1,
-};
-
-typedef uint16_t BrigRegisterKind16_t;
-enum BrigRegisterKind {
-    BRIG_REGISTER_KIND_CONTROL = 0,
-    BRIG_REGISTER_KIND_SINGLE = 1,
-    BRIG_REGISTER_KIND_DOUBLE = 2,
-    BRIG_REGISTER_KIND_QUAD = 3
-};
-
-typedef uint8_t BrigRound8_t;
-enum BrigRound {
-    BRIG_ROUND_NONE = 0,
-    BRIG_ROUND_FLOAT_DEFAULT = 1,
-    BRIG_ROUND_FLOAT_NEAR_EVEN = 2,
-    BRIG_ROUND_FLOAT_ZERO = 3,
-    BRIG_ROUND_FLOAT_PLUS_INFINITY = 4,
-    BRIG_ROUND_FLOAT_MINUS_INFINITY = 5,
-    BRIG_ROUND_INTEGER_NEAR_EVEN = 6,
-    BRIG_ROUND_INTEGER_ZERO = 7,
-    BRIG_ROUND_INTEGER_PLUS_INFINITY = 8,
-    BRIG_ROUND_INTEGER_MINUS_INFINITY = 9,
-    BRIG_ROUND_INTEGER_NEAR_EVEN_SAT = 10,
-    BRIG_ROUND_INTEGER_ZERO_SAT = 11,
-    BRIG_ROUND_INTEGER_PLUS_INFINITY_SAT = 12,
-    BRIG_ROUND_INTEGER_MINUS_INFINITY_SAT = 13,
-    BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN = 14,
-    BRIG_ROUND_INTEGER_SIGNALING_ZERO = 15,
-    BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY = 16,
-    BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY = 17,
-    BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN_SAT = 18,
-    BRIG_ROUND_INTEGER_SIGNALING_ZERO_SAT = 19,
-    BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY_SAT = 20,
-    BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY_SAT = 21
-};
-
-typedef uint8_t BrigSamplerAddressing8_t;
-enum BrigSamplerAddressing {
-    BRIG_ADDRESSING_UNDEFINED = 0,
-    BRIG_ADDRESSING_CLAMP_TO_EDGE = 1,
-    BRIG_ADDRESSING_CLAMP_TO_BORDER = 2,
-    BRIG_ADDRESSING_REPEAT = 3,
-    BRIG_ADDRESSING_MIRRORED_REPEAT = 4,
-
-    BRIG_ADDRESSING_FIRST_USER_DEFINED = 128
-};
-
-typedef uint8_t BrigSamplerCoordNormalization8_t;
-enum BrigSamplerCoordNormalization {
-    BRIG_COORD_UNNORMALIZED = 0,
-    BRIG_COORD_NORMALIZED = 1
-};
-
-typedef uint8_t BrigSamplerFilter8_t;
-enum BrigSamplerFilter {
-    BRIG_FILTER_NEAREST = 0,
-    BRIG_FILTER_LINEAR = 1,
-
-    BRIG_FILTER_FIRST_USER_DEFINED = 128
-};
-
-typedef uint8_t BrigSamplerQuery8_t;
-enum BrigSamplerQuery {
-    BRIG_SAMPLER_QUERY_ADDRESSING = 0,
-    BRIG_SAMPLER_QUERY_COORD = 1,
-    BRIG_SAMPLER_QUERY_FILTER = 2
-};
-
-typedef uint32_t BrigSectionIndex32_t;
-enum BrigSectionIndex {
-    BRIG_SECTION_INDEX_DATA = 0,
-    BRIG_SECTION_INDEX_CODE = 1,
-    BRIG_SECTION_INDEX_OPERAND = 2,
-
-    BRIG_SECTION_INDEX_BEGIN_IMPLEMENTATION_DEFINED = 3,
-};
-
-typedef uint8_t BrigSegCvtModifier8_t;
-enum BrigSegCvtModifierMask {
-    BRIG_SEG_CVT_NONULL = 1
-};
-
-typedef uint8_t BrigSegment8_t;
-enum BrigSegment {
-    BRIG_SEGMENT_NONE = 0,
-    BRIG_SEGMENT_FLAT = 1,
-    BRIG_SEGMENT_GLOBAL = 2,
-    BRIG_SEGMENT_READONLY = 3,
-    BRIG_SEGMENT_KERNARG = 4,
-    BRIG_SEGMENT_GROUP = 5,
-    BRIG_SEGMENT_PRIVATE = 6,
-    BRIG_SEGMENT_SPILL = 7,
-    BRIG_SEGMENT_ARG = 8,
-
-    BRIG_SEGMENT_FIRST_USER_DEFINED = 128
-};
-
-enum {
-    BRIG_TYPE_BASE_SIZE  = 5,
-    BRIG_TYPE_PACK_SIZE  = 2,
-    BRIG_TYPE_ARRAY_SIZE = 1,
-
-    BRIG_TYPE_BASE_SHIFT  = 0,
-    BRIG_TYPE_PACK_SHIFT  = BRIG_TYPE_BASE_SHIFT + BRIG_TYPE_BASE_SIZE,
-    BRIG_TYPE_ARRAY_SHIFT = BRIG_TYPE_PACK_SHIFT + BRIG_TYPE_PACK_SIZE,
-
-    BRIG_TYPE_BASE_MASK  = ((1 << BRIG_TYPE_BASE_SIZE)  - 1) << BRIG_TYPE_BASE_SHIFT,
-    BRIG_TYPE_PACK_MASK  = ((1 << BRIG_TYPE_PACK_SIZE)  - 1) << BRIG_TYPE_PACK_SHIFT,
-    BRIG_TYPE_ARRAY_MASK = ((1 << BRIG_TYPE_ARRAY_SIZE) - 1) << BRIG_TYPE_ARRAY_SHIFT,
-
-    BRIG_TYPE_PACK_NONE = 0 << BRIG_TYPE_PACK_SHIFT,
-    BRIG_TYPE_PACK_32   = 1 << BRIG_TYPE_PACK_SHIFT,
-    BRIG_TYPE_PACK_64   = 2 << BRIG_TYPE_PACK_SHIFT,
-    BRIG_TYPE_PACK_128  = 3 << BRIG_TYPE_PACK_SHIFT,
-
-    BRIG_TYPE_ARRAY     = 1 << BRIG_TYPE_ARRAY_SHIFT
-};
-
-typedef uint16_t BrigType16_t;
-enum BrigType {
-    BRIG_TYPE_NONE  = 0,
-    BRIG_TYPE_U8    = 1,
-    BRIG_TYPE_U16   = 2,
-    BRIG_TYPE_U32   = 3,
-    BRIG_TYPE_U64   = 4,
-    BRIG_TYPE_S8    = 5,
-    BRIG_TYPE_S16   = 6,
-    BRIG_TYPE_S32   = 7,
-    BRIG_TYPE_S64   = 8,
-    BRIG_TYPE_F16   = 9,
-    BRIG_TYPE_F32   = 10,
-    BRIG_TYPE_F64   = 11,
-    BRIG_TYPE_B1    = 12,
-    BRIG_TYPE_B8    = 13,
-    BRIG_TYPE_B16   = 14,
-    BRIG_TYPE_B32   = 15,
-    BRIG_TYPE_B64   = 16,
-    BRIG_TYPE_B128  = 17,
-    BRIG_TYPE_SAMP  = 18,
-    BRIG_TYPE_ROIMG = 19,
-    BRIG_TYPE_WOIMG = 20,
-    BRIG_TYPE_RWIMG = 21,
-    BRIG_TYPE_SIG32 = 22,
-    BRIG_TYPE_SIG64 = 23,
-
-    BRIG_TYPE_U8X4  = BRIG_TYPE_U8  | BRIG_TYPE_PACK_32,
-    BRIG_TYPE_U8X8  = BRIG_TYPE_U8  | BRIG_TYPE_PACK_64,
-    BRIG_TYPE_U8X16 = BRIG_TYPE_U8  | BRIG_TYPE_PACK_128,
-    BRIG_TYPE_U16X2 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_32,
-    BRIG_TYPE_U16X4 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_64,
-    BRIG_TYPE_U16X8 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_128,
-    BRIG_TYPE_U32X2 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_64,
-    BRIG_TYPE_U32X4 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_128,
-    BRIG_TYPE_U64X2 = BRIG_TYPE_U64 | BRIG_TYPE_PACK_128,
-    BRIG_TYPE_S8X4  = BRIG_TYPE_S8  | BRIG_TYPE_PACK_32,
-    BRIG_TYPE_S8X8  = BRIG_TYPE_S8  | BRIG_TYPE_PACK_64,
-    BRIG_TYPE_S8X16 = BRIG_TYPE_S8  | BRIG_TYPE_PACK_128,
-    BRIG_TYPE_S16X2 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_32,
-    BRIG_TYPE_S16X4 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_64,
-    BRIG_TYPE_S16X8 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_128,
-    BRIG_TYPE_S32X2 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_64,
-    BRIG_TYPE_S32X4 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_128,
-    BRIG_TYPE_S64X2 = BRIG_TYPE_S64 | BRIG_TYPE_PACK_128,
-    BRIG_TYPE_F16X2 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_32,
-    BRIG_TYPE_F16X4 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_64,
-    BRIG_TYPE_F16X8 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_128,
-    BRIG_TYPE_F32X2 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_64,
-    BRIG_TYPE_F32X4 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_128,
-    BRIG_TYPE_F64X2 = BRIG_TYPE_F64 | BRIG_TYPE_PACK_128,
-
-    BRIG_TYPE_U8_ARRAY    = BRIG_TYPE_U8    | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_U16_ARRAY   = BRIG_TYPE_U16   | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_U32_ARRAY   = BRIG_TYPE_U32   | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_U64_ARRAY   = BRIG_TYPE_U64   | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_S8_ARRAY    = BRIG_TYPE_S8    | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_S16_ARRAY   = BRIG_TYPE_S16   | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_S32_ARRAY   = BRIG_TYPE_S32   | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_S64_ARRAY   = BRIG_TYPE_S64   | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_F16_ARRAY   = BRIG_TYPE_F16   | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_F32_ARRAY   = BRIG_TYPE_F32   | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_F64_ARRAY   = BRIG_TYPE_F64   | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_B8_ARRAY    = BRIG_TYPE_B8    | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_B16_ARRAY   = BRIG_TYPE_B16   | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_B32_ARRAY   = BRIG_TYPE_B32   | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_B64_ARRAY   = BRIG_TYPE_B64   | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_B128_ARRAY  = BRIG_TYPE_B128  | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_SAMP_ARRAY  = BRIG_TYPE_SAMP  | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_ROIMG_ARRAY = BRIG_TYPE_ROIMG | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_WOIMG_ARRAY = BRIG_TYPE_WOIMG | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_RWIMG_ARRAY = BRIG_TYPE_RWIMG | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_SIG32_ARRAY = BRIG_TYPE_SIG32 | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_SIG64_ARRAY = BRIG_TYPE_SIG64 | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_U8X4_ARRAY  = BRIG_TYPE_U8X4  | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_U8X8_ARRAY  = BRIG_TYPE_U8X8  | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_U8X16_ARRAY = BRIG_TYPE_U8X16 | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_U16X2_ARRAY = BRIG_TYPE_U16X2 | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_U16X4_ARRAY = BRIG_TYPE_U16X4 | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_U16X8_ARRAY = BRIG_TYPE_U16X8 | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_U32X2_ARRAY = BRIG_TYPE_U32X2 | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_U32X4_ARRAY = BRIG_TYPE_U32X4 | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_U64X2_ARRAY = BRIG_TYPE_U64X2 | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_S8X4_ARRAY  = BRIG_TYPE_S8X4  | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_S8X8_ARRAY  = BRIG_TYPE_S8X8  | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_S8X16_ARRAY = BRIG_TYPE_S8X16 | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_S16X2_ARRAY = BRIG_TYPE_S16X2 | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_S16X4_ARRAY = BRIG_TYPE_S16X4 | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_S16X8_ARRAY = BRIG_TYPE_S16X8 | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_S32X2_ARRAY = BRIG_TYPE_S32X2 | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_S32X4_ARRAY = BRIG_TYPE_S32X4 | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_S64X2_ARRAY = BRIG_TYPE_S64X2 | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_F16X2_ARRAY = BRIG_TYPE_F16X2 | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_F16X4_ARRAY = BRIG_TYPE_F16X4 | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_F16X8_ARRAY = BRIG_TYPE_F16X8 | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_F32X2_ARRAY = BRIG_TYPE_F32X2 | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_F32X4_ARRAY = BRIG_TYPE_F32X4 | BRIG_TYPE_ARRAY,
-    BRIG_TYPE_F64X2_ARRAY = BRIG_TYPE_F64X2 | BRIG_TYPE_ARRAY,
-};
-
-typedef uint8_t BrigVariableModifier8_t;
-enum BrigVariableModifierMask {
-    BRIG_VARIABLE_DEFINITION = 1,
-    BRIG_VARIABLE_CONST = 2
-};
-
-typedef uint8_t BrigWidth8_t;
-enum BrigWidth {
-    BRIG_WIDTH_NONE = 0,
-    BRIG_WIDTH_1 = 1,
-    BRIG_WIDTH_2 = 2,
-    BRIG_WIDTH_4 = 3,
-    BRIG_WIDTH_8 = 4,
-    BRIG_WIDTH_16 = 5,
-    BRIG_WIDTH_32 = 6,
-    BRIG_WIDTH_64 = 7,
-    BRIG_WIDTH_128 = 8,
-    BRIG_WIDTH_256 = 9,
-    BRIG_WIDTH_512 = 10,
-    BRIG_WIDTH_1024 = 11,
-    BRIG_WIDTH_2048 = 12,
-    BRIG_WIDTH_4096 = 13,
-    BRIG_WIDTH_8192 = 14,
-    BRIG_WIDTH_16384 = 15,
-    BRIG_WIDTH_32768 = 16,
-    BRIG_WIDTH_65536 = 17,
-    BRIG_WIDTH_131072 = 18,
-    BRIG_WIDTH_262144 = 19,
-    BRIG_WIDTH_524288 = 20,
-    BRIG_WIDTH_1048576 = 21,
-    BRIG_WIDTH_2097152 = 22,
-    BRIG_WIDTH_4194304 = 23,
-    BRIG_WIDTH_8388608 = 24,
-    BRIG_WIDTH_16777216 = 25,
-    BRIG_WIDTH_33554432 = 26,
-    BRIG_WIDTH_67108864 = 27,
-    BRIG_WIDTH_134217728 = 28,
-    BRIG_WIDTH_268435456 = 29,
-    BRIG_WIDTH_536870912 = 30,
-    BRIG_WIDTH_1073741824 = 31,
-    BRIG_WIDTH_2147483648 = 32,
-    BRIG_WIDTH_WAVESIZE = 33,
-    BRIG_WIDTH_ALL = 34,
-};
-
-struct BrigUInt64 {
-    uint32_t lo;
-    uint32_t hi;
-};
-
-struct BrigBase {
-    uint16_t byteCount;
-    BrigKind16_t kind;
-};
-
-struct BrigData {
-    uint32_t byteCount;
-    uint8_t bytes[1];
-};
-
-struct BrigDirectiveArgBlock {
-    BrigBase base;
-};
-
-struct BrigDirectiveComment {
-    BrigBase base;
-    BrigDataOffsetString32_t name;
-};
-
-struct BrigDirectiveControl {
-    BrigBase base;
-    BrigControlDirective16_t control;
-    uint16_t reserved;
-    BrigDataOffsetOperandList32_t operands;
-};
-
-struct BrigDirectiveExecutable {
-    BrigBase base;
-    BrigDataOffsetString32_t name;
-    uint16_t outArgCount;
-    uint16_t inArgCount;
-    BrigCodeOffset32_t firstInArg;
-    BrigCodeOffset32_t firstCodeBlockEntry;
-    BrigCodeOffset32_t nextModuleEntry;
-    BrigExecutableModifier8_t modifier;
-    BrigLinkage8_t linkage;
-    uint16_t reserved;
-};
-
-struct BrigDirectiveExtension {
-    BrigBase base;
-    BrigDataOffsetString32_t name;
-};
-
-struct BrigDirectiveFbarrier {
-    BrigBase base;
-    BrigDataOffsetString32_t name;
-    BrigVariableModifier8_t modifier;
-    BrigLinkage8_t linkage;
-    uint16_t reserved;
-};
-
-struct BrigDirectiveLabel {
-    BrigBase base;
-    BrigDataOffsetString32_t name;
-};
-
-struct BrigDirectiveLoc {
-    BrigBase base;
-    BrigDataOffsetString32_t filename;
-    uint32_t line;
-    uint32_t column;
-};
-
-struct BrigDirectiveNone {
-    BrigBase base;
-};
-
-struct BrigDirectivePragma {
-    BrigBase base;
-    BrigDataOffsetOperandList32_t operands;
-};
-
-struct BrigDirectiveVariable {
-    BrigBase base;
-    BrigDataOffsetString32_t name;
-    BrigOperandOffset32_t init;
-    BrigType16_t type;
-    BrigSegment8_t segment;
-    BrigAlignment8_t align;
-    BrigUInt64 dim;
-    BrigVariableModifier8_t modifier;
-    BrigLinkage8_t linkage;
-    BrigAllocation8_t allocation;
-    uint8_t reserved;
-};
-
-struct BrigDirectiveModule {
-    BrigBase base;
-    BrigDataOffsetString32_t name;
-    BrigVersion32_t hsailMajor;
-    BrigVersion32_t hsailMinor;
-    BrigProfile8_t profile;
-    BrigMachineModel8_t machineModel;
-    BrigRound8_t defaultFloatRound;
-    uint8_t reserved;
-};
-
-struct BrigInstBase {
-    BrigBase base;
-    BrigOpcode16_t opcode;
-    BrigType16_t type;
-    BrigDataOffsetOperandList32_t operands;
-};
-
-struct BrigInstAddr {
-    BrigInstBase base;
-    BrigSegment8_t segment;
-    uint8_t reserved[3];
-};
-
-struct BrigInstAtomic {
-    BrigInstBase base;
-    BrigSegment8_t segment;
-    BrigMemoryOrder8_t memoryOrder;
-    BrigMemoryScope8_t memoryScope;
-    BrigAtomicOperation8_t atomicOperation;
-    uint8_t equivClass;
-    uint8_t reserved[3];
-};
-
-struct BrigInstBasic {
-    BrigInstBase base;
-};
-
-struct BrigInstBr {
-    BrigInstBase base;
-    BrigWidth8_t width;
-    uint8_t reserved[3];
-};
-
-struct BrigInstCmp {
-    BrigInstBase base;
-    BrigType16_t sourceType;
-    BrigAluModifier8_t modifier;
-    BrigCompareOperation8_t compare;
-    BrigPack8_t pack;
-    uint8_t reserved[3];
-};
-
-struct BrigInstCvt {
-    BrigInstBase base;
-    BrigType16_t sourceType;
-    BrigAluModifier8_t modifier;
-    BrigRound8_t round;
-};
-
-struct BrigInstImage {
-    BrigInstBase base;
-    BrigType16_t imageType;
-    BrigType16_t coordType;
-    BrigImageGeometry8_t geometry;
-    uint8_t equivClass;
-    uint16_t reserved;
-};
-
-struct BrigInstLane {
-    BrigInstBase base;
-    BrigType16_t sourceType;
-    BrigWidth8_t width;
-    uint8_t reserved;
-};
-
-struct BrigInstMem {
-    BrigInstBase base;
-    BrigSegment8_t segment;
-    BrigAlignment8_t align;
-    uint8_t equivClass;
-    BrigWidth8_t width;
-    BrigMemoryModifier8_t modifier;
-    uint8_t reserved[3];
-};
-
-struct BrigInstMemFence {
-    BrigInstBase base;
-    BrigMemoryOrder8_t memoryOrder;
-    BrigMemoryScope8_t globalSegmentMemoryScope;
-    BrigMemoryScope8_t groupSegmentMemoryScope;
-    BrigMemoryScope8_t imageSegmentMemoryScope;
-};
-
-struct BrigInstMod {
-    BrigInstBase base;
-    BrigAluModifier8_t modifier;
-    BrigRound8_t round;
-    BrigPack8_t pack;
-    uint8_t reserved;
-};
-
-struct BrigInstQueryImage {
-    BrigInstBase base;
-    BrigType16_t imageType;
-    BrigImageGeometry8_t geometry;
-    BrigImageQuery8_t query;
-};
-
-struct BrigInstQuerySampler {
-    BrigInstBase base;
-    BrigSamplerQuery8_t query;
-    uint8_t reserved[3];
-};
-
-struct BrigInstQueue {
-    BrigInstBase base;
-    BrigSegment8_t segment;
-    BrigMemoryOrder8_t memoryOrder;
-    uint16_t reserved;
-};
-
-struct BrigInstSeg {
-    BrigInstBase base;
-    BrigSegment8_t segment;
-    uint8_t reserved[3];
-};
-
-struct BrigInstSegCvt {
-    BrigInstBase base;
-    BrigType16_t sourceType;
-    BrigSegment8_t segment;
-    BrigSegCvtModifier8_t modifier;
-};
-
-struct BrigInstSignal {
-    BrigInstBase base;
-    BrigType16_t signalType;
-    BrigMemoryOrder8_t memoryOrder;
-    BrigAtomicOperation8_t signalOperation;
-};
-
-struct BrigInstSourceType {
-    BrigInstBase base;
-    BrigType16_t sourceType;
-    uint16_t reserved;
-};
-
-struct BrigOperandAddress {
-    BrigBase base;
-    BrigCodeOffset32_t symbol;
-    BrigOperandOffset32_t reg;
-    BrigUInt64 offset;
-};
-
-struct BrigOperandAlign {
-    BrigBase base;
-    BrigAlignment8_t align;
-    uint8_t reserved[3];
-};
-
-struct BrigOperandCodeList {
-    BrigBase base;
-    BrigDataOffsetCodeList32_t elements;
-};
-
-struct BrigOperandCodeRef {
-    BrigBase base;
-    BrigCodeOffset32_t ref;
-};
-
-struct BrigOperandConstantBytes {
-    BrigBase base;
-    BrigType16_t type;
-    uint16_t reserved;
-    BrigDataOffsetString32_t bytes;
-};
-
-struct BrigOperandConstantOperandList {
-    BrigBase base;
-    BrigType16_t type;
-    uint16_t reserved;
-    BrigDataOffsetOperandList32_t elements;
-};
-
-struct BrigOperandConstantImage {
-    BrigBase base;
-    BrigType16_t type;
-    BrigImageGeometry8_t geometry;
-    BrigImageChannelOrder8_t channelOrder;
-    BrigImageChannelType8_t channelType;
-    uint8_t reserved[3];
-    BrigUInt64 width;
-    BrigUInt64 height;
-    BrigUInt64 depth;
-    BrigUInt64 array;
-};
-
-struct BrigOperandOperandList {
-    BrigBase base;
-    BrigDataOffsetOperandList32_t elements;
-};
-
-struct BrigOperandRegister {
-    BrigBase base;
-    BrigRegisterKind16_t regKind;
-    uint16_t regNum;
-};
-
-struct BrigOperandConstantSampler {
-    BrigBase base;
-    BrigType16_t type;
-    BrigSamplerCoordNormalization8_t coord;
-    BrigSamplerFilter8_t filter;
-    BrigSamplerAddressing8_t addressing;
-    uint8_t reserved[3];
-};
-
-struct BrigOperandString {
-    BrigBase base;
-    BrigDataOffsetString32_t string;
-};
-
-struct BrigOperandWavesize {
-    BrigBase base;
-};
-
-typedef uint32_t BrigExceptions32_t;
-enum BrigExceptionsMask {
-    BRIG_EXCEPTIONS_INVALID_OPERATION = 1 << 0,
-    BRIG_EXCEPTIONS_DIVIDE_BY_ZERO = 1 << 1,
-    BRIG_EXCEPTIONS_OVERFLOW = 1 << 2,
-    BRIG_EXCEPTIONS_UNDERFLOW = 1 << 3,
-    BRIG_EXCEPTIONS_INEXACT = 1 << 4,
-
-    BRIG_EXCEPTIONS_FIRST_USER_DEFINED = 1 << 16
-};
-
-struct BrigSectionHeader {
-    uint64_t byteCount;
-    uint32_t headerByteCount;
-    uint32_t nameLength;
-    uint8_t name[1];
-};
-
-struct BrigModuleHeader {
-    char identification[8];
-    BrigVersion32_t brigMajor;
-    BrigVersion32_t brigMinor;
-    uint64_t byteCount;
-    uint8_t hash[64];
-    uint32_t reserved;
-    uint32_t sectionCount;
-    uint64_t sectionIndex;
-};
-
-typedef BrigModuleHeader* BrigModule_t;
-
-#ifdef __cplusplus
-}
-#endif  /*__cplusplus*/
-
-#endif // defined(INCLUDED_BRIG_H)
diff --git a/third_party/rocm/include/hsa/amd_hsa_common.h b/third_party/rocm/include/hsa/amd_hsa_common.h
deleted file mode 100644
index 7c4ed3e..0000000
--- a/third_party/rocm/include/hsa/amd_hsa_common.h
+++ /dev/null
@@ -1,91 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-//
-// The University of Illinois/NCSA
-// Open Source License (NCSA)
-// 
-// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
-// 
-// Developed by:
-// 
-//                 AMD Research and AMD HSA Software Development
-// 
-//                 Advanced Micro Devices, Inc.
-// 
-//                 www.amd.com
-// 
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to
-// deal with the Software without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-// and/or sell copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following conditions:
-// 
-//  - Redistributions of source code must retain the above copyright notice,
-//    this list of conditions and the following disclaimers.
-//  - Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimers in
-//    the documentation and/or other materials provided with the distribution.
-//  - Neither the names of Advanced Micro Devices, Inc,
-//    nor the names of its contributors may be used to endorse or promote
-//    products derived from this Software without specific prior written
-//    permission.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-// DEALINGS WITH THE SOFTWARE.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-// The following set of header files provides definitions for AMD GPU
-// Architecture:
-//   - amd_hsa_common.h
-//   - amd_hsa_elf.h
-//   - amd_hsa_kernel_code.h
-//   - amd_hsa_queue.h
-//   - amd_hsa_signal.h
-//
-// Refer to "HSA Application Binary Interface: AMD GPU Architecture" for more
-// information.
-
-#ifndef AMD_HSA_COMMON_H
-#define AMD_HSA_COMMON_H
-
-#include <stddef.h>
-#include <stdint.h>
-
-// Descriptive version of the HSA Application Binary Interface.
-#define AMD_HSA_ABI_VERSION "AMD GPU Architecture v0.35 (June 25, 2015)"
-
-// Alignment attribute that specifies a minimum alignment (in bytes) for
-// variables of the specified type.
-#if defined(__GNUC__)
-#  define __ALIGNED__(x) __attribute__((aligned(x)))
-#elif defined(_MSC_VER)
-#  define __ALIGNED__(x) __declspec(align(x))
-#elif defined(RC_INVOKED)
-#  define __ALIGNED__(x)
-#else
-#  error
-#endif
-
-// Creates enumeration entries for packed types. Enumeration entries include
-// bit shift amount, bit width, and bit mask.
-#define AMD_HSA_BITS_CREATE_ENUM_ENTRIES(name, shift, width)                   \
-  name##_SHIFT = (shift),                                                      \
-  name##_WIDTH = (width),                                                      \
-  name = (((1 << (width)) - 1) << (shift))                                     \
-
-// Gets bits for specified mask from specified src packed instance.
-#define AMD_HSA_BITS_GET(src, mask)                                            \
-  ((src & mask) >> mask ## _SHIFT)                                             \
-
-// Sets val bits for specified mask in specified dst packed instance.
-#define AMD_HSA_BITS_SET(dst, mask, val)                                       \
-  dst &= (~(1 << mask##_SHIFT) & ~mask);                                       \
-  dst |= (((val) << mask##_SHIFT) & mask)                                      \
-
-#endif // AMD_HSA_COMMON_H
diff --git a/third_party/rocm/include/hsa/amd_hsa_elf.h b/third_party/rocm/include/hsa/amd_hsa_elf.h
deleted file mode 100644
index adcdec4..0000000
--- a/third_party/rocm/include/hsa/amd_hsa_elf.h
+++ /dev/null
@@ -1,416 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-//
-// The University of Illinois/NCSA
-// Open Source License (NCSA)
-// 
-// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
-// 
-// Developed by:
-// 
-//                 AMD Research and AMD HSA Software Development
-// 
-//                 Advanced Micro Devices, Inc.
-// 
-//                 www.amd.com
-// 
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to
-// deal with the Software without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-// and/or sell copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following conditions:
-// 
-//  - Redistributions of source code must retain the above copyright notice,
-//    this list of conditions and the following disclaimers.
-//  - Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimers in
-//    the documentation and/or other materials provided with the distribution.
-//  - Neither the names of Advanced Micro Devices, Inc,
-//    nor the names of its contributors may be used to endorse or promote
-//    products derived from this Software without specific prior written
-//    permission.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-// DEALINGS WITH THE SOFTWARE.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-// Undefine the macro in case it is defined in the system elf.h.
-#undef EM_AMDGPU
-
-#ifndef AMD_HSA_ELF_H
-#define AMD_HSA_ELF_H
-
-// AMD GPU Specific ELF Header Enumeration Values.
-//
-// Values are copied from LLVM BinaryFormat/ELF.h . This file also contains
-// code object V1 defintions which are not part of the LLVM header. Code object
-// V1 was only supported by the Finalizer which is now deprecated and removed.
-//
-// TODO: Deprecate and remove V1 support and replace this header with using the
-// LLVM header.
-namespace ELF {
-
-// Machine architectures
-// See current registered ELF machine architectures at:
-//    http://www.uxsglobal.com/developers/gabi/latest/ch4.eheader.html
-enum {
-  EM_AMDGPU = 224,        // AMD GPU architecture
-};
-
-// OS ABI identification.
-enum {
-  ELFOSABI_AMDGPU_HSA = 64,    // AMD HSA runtime
-};
-
-// AMDGPU OS ABI Version identification.
-enum {
-  // ELFABIVERSION_AMDGPU_HSA_V1 does not exist because OS ABI identification
-  // was never defined for V1.
-  ELFABIVERSION_AMDGPU_HSA_V2 = 0,
-  ELFABIVERSION_AMDGPU_HSA_V3 = 1,
-  ELFABIVERSION_AMDGPU_HSA_V4 = 2
-};
-
-// AMDGPU specific e_flags.
-enum : unsigned {
-  // Processor selection mask for EF_AMDGPU_MACH_* values.
-  EF_AMDGPU_MACH = 0x0ff,
-
-  // Not specified processor.
-  EF_AMDGPU_MACH_NONE = 0x000,
-
-  // AMDGCN-based processors.
-  EF_AMDGPU_MACH_AMDGCN_GFX600        = 0x020,
-  EF_AMDGPU_MACH_AMDGCN_GFX601        = 0x021,
-  EF_AMDGPU_MACH_AMDGCN_GFX700        = 0x022,
-  EF_AMDGPU_MACH_AMDGCN_GFX701        = 0x023,
-  EF_AMDGPU_MACH_AMDGCN_GFX702        = 0x024,
-  EF_AMDGPU_MACH_AMDGCN_GFX703        = 0x025,
-  EF_AMDGPU_MACH_AMDGCN_GFX704        = 0x026,
-  EF_AMDGPU_MACH_AMDGCN_RESERVED_0X27 = 0x027,
-  EF_AMDGPU_MACH_AMDGCN_GFX801        = 0x028,
-  EF_AMDGPU_MACH_AMDGCN_GFX802        = 0x029,
-  EF_AMDGPU_MACH_AMDGCN_GFX803        = 0x02a,
-  EF_AMDGPU_MACH_AMDGCN_GFX810        = 0x02b,
-  EF_AMDGPU_MACH_AMDGCN_GFX900        = 0x02c,
-  EF_AMDGPU_MACH_AMDGCN_GFX902        = 0x02d,
-  EF_AMDGPU_MACH_AMDGCN_GFX904        = 0x02e,
-  EF_AMDGPU_MACH_AMDGCN_GFX906        = 0x02f,
-  EF_AMDGPU_MACH_AMDGCN_GFX908        = 0x030,
-  EF_AMDGPU_MACH_AMDGCN_GFX909        = 0x031,
-  EF_AMDGPU_MACH_AMDGCN_GFX90C        = 0x032,
-  EF_AMDGPU_MACH_AMDGCN_GFX1010       = 0x033,
-  EF_AMDGPU_MACH_AMDGCN_GFX1011       = 0x034,
-  EF_AMDGPU_MACH_AMDGCN_GFX1012       = 0x035,
-  EF_AMDGPU_MACH_AMDGCN_GFX1030       = 0x036,
-  EF_AMDGPU_MACH_AMDGCN_GFX1031       = 0x037,
-  EF_AMDGPU_MACH_AMDGCN_GFX1032       = 0x038,
-  EF_AMDGPU_MACH_AMDGCN_GFX1033       = 0x039,
-  EF_AMDGPU_MACH_AMDGCN_GFX602        = 0x03a,
-  EF_AMDGPU_MACH_AMDGCN_GFX705        = 0x03b,
-  EF_AMDGPU_MACH_AMDGCN_GFX805        = 0x03c,
-
-  // First/last AMDGCN-based processors.
-  EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
-  EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX805,
-
-  // Indicates if the "xnack" target feature is enabled for all code contained
-  // in the object.
-  //
-  // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V2.
-  EF_AMDGPU_FEATURE_XNACK_V2 = 0x01,
-  // Indicates if the trap handler is enabled for all code contained
-  // in the object.
-  //
-  // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V2.
-  EF_AMDGPU_FEATURE_TRAP_HANDLER_V2 = 0x02,
-
-  // Indicates if the "xnack" target feature is enabled for all code contained
-  // in the object.
-  //
-  // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3.
-  EF_AMDGPU_FEATURE_XNACK_V3 = 0x100,
-  // Indicates if the "sramecc" target feature is enabled for all code
-  // contained in the object.
-  //
-  // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3.
-  EF_AMDGPU_FEATURE_SRAMECC_V3 = 0x200,
-
-  // XNACK selection mask for EF_AMDGPU_FEATURE_XNACK_* values.
-  //
-  // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4.
-  EF_AMDGPU_FEATURE_XNACK_V4 = 0x300,
-  // XNACK is not supported.
-  EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 = 0x000,
-  // XNACK is any/default/unspecified.
-  EF_AMDGPU_FEATURE_XNACK_ANY_V4 = 0x100,
-  // XNACK is off.
-  EF_AMDGPU_FEATURE_XNACK_OFF_V4 = 0x200,
-  // XNACK is on.
-  EF_AMDGPU_FEATURE_XNACK_ON_V4 = 0x300,
-
-  // SRAMECC selection mask for EF_AMDGPU_FEATURE_SRAMECC_* values.
-  //
-  // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4.
-  EF_AMDGPU_FEATURE_SRAMECC_V4 = 0xc00,
-  // SRAMECC is not supported.
-  EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4 = 0x000,
-  // SRAMECC is any/default/unspecified.
-  EF_AMDGPU_FEATURE_SRAMECC_ANY_V4 = 0x400,
-  // SRAMECC is off.
-  EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 = 0x800,
-  // SRAMECC is on.
-  EF_AMDGPU_FEATURE_SRAMECC_ON_V4 = 0xc00,
-};
-
-} // end namespace ELF
-
-// ELF Section Header Flag Enumeration Values.
-#define SHF_AMDGPU_HSA_GLOBAL   (0x00100000 & SHF_MASKOS)
-#define SHF_AMDGPU_HSA_READONLY (0x00200000 & SHF_MASKOS)
-#define SHF_AMDGPU_HSA_CODE     (0x00400000 & SHF_MASKOS)
-#define SHF_AMDGPU_HSA_AGENT    (0x00800000 & SHF_MASKOS)
-
-//
-typedef enum {
-  AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM = 0,
-  AMDGPU_HSA_SEGMENT_GLOBAL_AGENT = 1,
-  AMDGPU_HSA_SEGMENT_READONLY_AGENT = 2,
-  AMDGPU_HSA_SEGMENT_CODE_AGENT = 3,
-  AMDGPU_HSA_SEGMENT_LAST,
-} amdgpu_hsa_elf_segment_t;
-
-// ELF Program Header Type Enumeration Values.
-#define PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM (PT_LOOS + AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM)
-#define PT_AMDGPU_HSA_LOAD_GLOBAL_AGENT   (PT_LOOS + AMDGPU_HSA_SEGMENT_GLOBAL_AGENT)
-#define PT_AMDGPU_HSA_LOAD_READONLY_AGENT (PT_LOOS + AMDGPU_HSA_SEGMENT_READONLY_AGENT)
-#define PT_AMDGPU_HSA_LOAD_CODE_AGENT     (PT_LOOS + AMDGPU_HSA_SEGMENT_CODE_AGENT)
-
-// ELF Symbol Type Enumeration Values.
-#define STT_AMDGPU_HSA_KERNEL            (STT_LOOS + 0)
-#define STT_AMDGPU_HSA_INDIRECT_FUNCTION (STT_LOOS + 1)
-#define STT_AMDGPU_HSA_METADATA          (STT_LOOS + 2)
-
-// ELF Symbol Binding Enumeration Values.
-#define STB_AMDGPU_HSA_EXTERNAL (STB_LOOS + 0)
-
-// ELF Symbol Other Information Creation/Retrieval.
-#define ELF64_ST_AMDGPU_ALLOCATION(o)  (((o) >> 2) & 0x3)
-#define ELF64_ST_AMDGPU_FLAGS(o)       ((o) >> 4)
-#define ELF64_ST_AMDGPU_OTHER(f, a, v) (((f) << 4) + (((a) & 0x3) << 2) + ((v) & 0x3))
-
-typedef enum {
-  AMDGPU_HSA_SYMBOL_ALLOCATION_DEFAULT = 0,
-  AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_PROGRAM = 1,
-  AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_AGENT = 2,
-  AMDGPU_HSA_SYMBOL_ALLOCATION_READONLY_AGENT = 3,
-  AMDGPU_HSA_SYMBOL_ALLOCATION_LAST,
-} amdgpu_hsa_symbol_allocation_t;
-
-// ELF Symbol Allocation Enumeration Values.
-#define STA_AMDGPU_HSA_DEFAULT        AMDGPU_HSA_SYMBOL_ALLOCATION_DEFAULT
-#define STA_AMDGPU_HSA_GLOBAL_PROGRAM AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_PROGRAM
-#define STA_AMDGPU_HSA_GLOBAL_AGENT   AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_AGENT
-#define STA_AMDGPU_HSA_READONLY_AGENT AMDGPU_HSA_SYMBOL_ALLOCATION_READONLY_AGENT
-
-typedef enum {
-  AMDGPU_HSA_SYMBOL_FLAG_DEFAULT = 0,
-  AMDGPU_HSA_SYMBOL_FLAG_CONST = 1,
-  AMDGPU_HSA_SYMBOL_FLAG_LAST,
-} amdgpu_hsa_symbol_flag_t;
-
-// ELF Symbol Flag Enumeration Values.
-#define STF_AMDGPU_HSA_CONST AMDGPU_HSA_SYMBOL_FLAG_CONST
-
-// AMD GPU Relocation Type Enumeration Values.
-#define R_AMDGPU_NONE         0
-#define R_AMDGPU_32_LOW       1
-#define R_AMDGPU_32_HIGH      2
-#define R_AMDGPU_64           3
-#define R_AMDGPU_INIT_SAMPLER 4
-#define R_AMDGPU_INIT_IMAGE   5
-#define R_AMDGPU_RELATIVE64   13
-
-// AMD GPU Note Type Enumeration Values.
-#define NT_AMD_HSA_CODE_OBJECT_VERSION 1
-#define NT_AMD_HSA_HSAIL               2
-#define NT_AMD_HSA_ISA_VERSION         3
-#define NT_AMD_HSA_PRODUCER            4
-#define NT_AMD_HSA_PRODUCER_OPTIONS    5
-#define NT_AMD_HSA_EXTENSION           6
-#define NT_AMD_HSA_ISA_NAME            11
-#define NT_AMD_HSA_HLDEBUG_DEBUG       101
-#define NT_AMD_HSA_HLDEBUG_TARGET      102
-
-// AMD GPU Metadata Kind Enumeration Values.
-typedef uint16_t amdgpu_hsa_metadata_kind16_t;
-typedef enum {
-  AMDGPU_HSA_METADATA_KIND_NONE = 0,
-  AMDGPU_HSA_METADATA_KIND_INIT_SAMP = 1,
-  AMDGPU_HSA_METADATA_KIND_INIT_ROIMG = 2,
-  AMDGPU_HSA_METADATA_KIND_INIT_WOIMG = 3,
-  AMDGPU_HSA_METADATA_KIND_INIT_RWIMG = 4
-} amdgpu_hsa_metadata_kind_t;
-
-// AMD GPU Sampler Coordinate Normalization Enumeration Values.
-typedef uint8_t amdgpu_hsa_sampler_coord8_t;
-typedef enum {
-  AMDGPU_HSA_SAMPLER_COORD_UNNORMALIZED = 0,
-  AMDGPU_HSA_SAMPLER_COORD_NORMALIZED = 1
-} amdgpu_hsa_sampler_coord_t;
-
-// AMD GPU Sampler Filter Enumeration Values.
-typedef uint8_t amdgpu_hsa_sampler_filter8_t;
-typedef enum {
-  AMDGPU_HSA_SAMPLER_FILTER_NEAREST = 0,
-  AMDGPU_HSA_SAMPLER_FILTER_LINEAR = 1
-} amdgpu_hsa_sampler_filter_t;
-
-// AMD GPU Sampler Addressing Enumeration Values.
-typedef uint8_t amdgpu_hsa_sampler_addressing8_t;
-typedef enum {
-  AMDGPU_HSA_SAMPLER_ADDRESSING_UNDEFINED = 0,
-  AMDGPU_HSA_SAMPLER_ADDRESSING_CLAMP_TO_EDGE = 1,
-  AMDGPU_HSA_SAMPLER_ADDRESSING_CLAMP_TO_BORDER = 2,
-  AMDGPU_HSA_SAMPLER_ADDRESSING_REPEAT = 3,
-  AMDGPU_HSA_SAMPLER_ADDRESSING_MIRRORED_REPEAT = 4
-} amdgpu_hsa_sampler_addressing_t;
-
-// AMD GPU Sampler Descriptor.
-typedef struct amdgpu_hsa_sampler_descriptor_s {
-  uint16_t size;
-  amdgpu_hsa_metadata_kind16_t kind;
-  amdgpu_hsa_sampler_coord8_t coord;
-  amdgpu_hsa_sampler_filter8_t filter;
-  amdgpu_hsa_sampler_addressing8_t addressing;
-  uint8_t reserved1;
-} amdgpu_hsa_sampler_descriptor_t;
-
-// AMD GPU Image Geometry Enumeration Values.
-typedef uint8_t amdgpu_hsa_image_geometry8_t;
-typedef enum {
-  AMDGPU_HSA_IMAGE_GEOMETRY_1D = 0,
-  AMDGPU_HSA_IMAGE_GEOMETRY_2D = 1,
-  AMDGPU_HSA_IMAGE_GEOMETRY_3D = 2,
-  AMDGPU_HSA_IMAGE_GEOMETRY_1DA = 3,
-  AMDGPU_HSA_IMAGE_GEOMETRY_2DA = 4,
-  AMDGPU_HSA_IMAGE_GEOMETRY_1DB = 5,
-  AMDGPU_HSA_IMAGE_GEOMETRY_2DDEPTH = 6,
-  AMDGPU_HSA_IMAGE_GEOMETRY_2DADEPTH = 7
-} amdgpu_hsa_image_geometry_t;
-
-// AMD GPU Image Channel Order Enumeration Values.
-typedef uint8_t amdgpu_hsa_image_channel_order8_t;
-typedef enum {
-  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_A = 0,
-  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_R = 1,
-  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RX = 2,
-  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RG = 3,
-  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGX = 4,
-  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RA = 5,
-  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGB = 6,
-  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGBX = 7,
-  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGBA = 8,
-  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_BGRA = 9,
-  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_ARGB = 10,
-  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_ABGR = 11,
-  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGB = 12,
-  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGBX = 13,
-  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGBA = 14,
-  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SBGRA = 15,
-  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_INTENSITY = 16,
-  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_LUMINANCE = 17,
-  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_DEPTH = 18,
-  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL = 19
-} amdgpu_hsa_image_channel_order_t;
-
-// AMD GPU Image Channel Type Enumeration Values.
-typedef uint8_t amdgpu_hsa_image_channel_type8_t;
-typedef enum {
-  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SNORM_INT8 = 0,
-  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SNORM_INT16 = 1,
-  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT8 = 2,
-  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT16 = 3,
-  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT24 = 4,
-  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SHORT_555 = 5,
-  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SHORT_565 = 6,
-  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_INT_101010 = 7,
-  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT8 = 8,
-  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT16 = 9,
-  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT32 = 10,
-  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 = 11,
-  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 = 12,
-  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 = 13,
-  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_HALF_FLOAT = 14,
-  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_FLOAT = 15
-} amdgpu_hsa_image_channel_type_t;
-
-// AMD GPU Image Descriptor.
-typedef struct amdgpu_hsa_image_descriptor_s {
-  uint16_t size;
-  amdgpu_hsa_metadata_kind16_t kind;
-  amdgpu_hsa_image_geometry8_t geometry;
-  amdgpu_hsa_image_channel_order8_t channel_order;
-  amdgpu_hsa_image_channel_type8_t channel_type;
-  uint8_t reserved1;
-  uint64_t width;
-  uint64_t height;
-  uint64_t depth;
-  uint64_t array;
-} amdgpu_hsa_image_descriptor_t;
-
-typedef struct amdgpu_hsa_note_code_object_version_s {
-  uint32_t major_version;
-  uint32_t minor_version;
-} amdgpu_hsa_note_code_object_version_t;
-
-typedef struct amdgpu_hsa_note_hsail_s {
-  uint32_t hsail_major_version;
-  uint32_t hsail_minor_version;
-  uint8_t profile;
-  uint8_t machine_model;
-  uint8_t default_float_round;
-} amdgpu_hsa_note_hsail_t;
-
-typedef struct amdgpu_hsa_note_isa_s {
-  uint16_t vendor_name_size;
-  uint16_t architecture_name_size;
-  uint32_t major;
-  uint32_t minor;
-  uint32_t stepping;
-  char vendor_and_architecture_name[1];
-} amdgpu_hsa_note_isa_t;
-
-typedef struct amdgpu_hsa_note_producer_s {
-  uint16_t producer_name_size;
-  uint16_t reserved;
-  uint32_t producer_major_version;
-  uint32_t producer_minor_version;
-  char producer_name[1];
-} amdgpu_hsa_note_producer_t;
-
-typedef struct amdgpu_hsa_note_producer_options_s {
-  uint16_t producer_options_size;
-  char producer_options[1];
-} amdgpu_hsa_note_producer_options_t;
-
-typedef enum {
-  AMDGPU_HSA_RODATA_GLOBAL_PROGRAM = 0,
-  AMDGPU_HSA_RODATA_GLOBAL_AGENT,
-  AMDGPU_HSA_RODATA_READONLY_AGENT,
-  AMDGPU_HSA_DATA_GLOBAL_PROGRAM,
-  AMDGPU_HSA_DATA_GLOBAL_AGENT,
-  AMDGPU_HSA_DATA_READONLY_AGENT,
-  AMDGPU_HSA_BSS_GLOBAL_PROGRAM,
-  AMDGPU_HSA_BSS_GLOBAL_AGENT,
-  AMDGPU_HSA_BSS_READONLY_AGENT,
-  AMDGPU_HSA_SECTION_LAST,
-} amdgpu_hsa_elf_section_t;
-
-#endif // AMD_HSA_ELF_H
diff --git a/third_party/rocm/include/hsa/amd_hsa_kernel_code.h b/third_party/rocm/include/hsa/amd_hsa_kernel_code.h
deleted file mode 100644
index 901e49c..0000000
--- a/third_party/rocm/include/hsa/amd_hsa_kernel_code.h
+++ /dev/null
@@ -1,269 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-//
-// The University of Illinois/NCSA
-// Open Source License (NCSA)
-// 
-// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
-// 
-// Developed by:
-// 
-//                 AMD Research and AMD HSA Software Development
-// 
-//                 Advanced Micro Devices, Inc.
-// 
-//                 www.amd.com
-// 
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to
-// deal with the Software without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-// and/or sell copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following conditions:
-// 
-//  - Redistributions of source code must retain the above copyright notice,
-//    this list of conditions and the following disclaimers.
-//  - Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimers in
-//    the documentation and/or other materials provided with the distribution.
-//  - Neither the names of Advanced Micro Devices, Inc,
-//    nor the names of its contributors may be used to endorse or promote
-//    products derived from this Software without specific prior written
-//    permission.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-// DEALINGS WITH THE SOFTWARE.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef AMD_HSA_KERNEL_CODE_H
-#define AMD_HSA_KERNEL_CODE_H
-
-#include "amd_hsa_common.h"
-#include "hsa.h"
-
-// AMD Kernel Code Version Enumeration Values.
-typedef uint32_t amd_kernel_code_version32_t;
-enum amd_kernel_code_version_t {
-  AMD_KERNEL_CODE_VERSION_MAJOR = 1,
-  AMD_KERNEL_CODE_VERSION_MINOR = 1
-};
-
-// AMD Machine Kind Enumeration Values.
-typedef uint16_t amd_machine_kind16_t;
-enum amd_machine_kind_t {
-  AMD_MACHINE_KIND_UNDEFINED = 0,
-  AMD_MACHINE_KIND_AMDGPU = 1
-};
-
-// AMD Machine Version.
-typedef uint16_t amd_machine_version16_t;
-
-// AMD Float Round Mode Enumeration Values.
-enum amd_float_round_mode_t {
-  AMD_FLOAT_ROUND_MODE_NEAREST_EVEN = 0,
-  AMD_FLOAT_ROUND_MODE_PLUS_INFINITY = 1,
-  AMD_FLOAT_ROUND_MODE_MINUS_INFINITY = 2,
-  AMD_FLOAT_ROUND_MODE_ZERO = 3
-};
-
-// AMD Float Denorm Mode Enumeration Values.
-enum amd_float_denorm_mode_t {
-  AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE_OUTPUT = 0,
-  AMD_FLOAT_DENORM_MODE_FLUSH_OUTPUT = 1,
-  AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE = 2,
-  AMD_FLOAT_DENORM_MODE_NO_FLUSH = 3
-};
-
-// AMD Compute Program Resource Register One.
-typedef uint32_t amd_compute_pgm_rsrc_one32_t;
-enum amd_compute_pgm_rsrc_one_t {
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_PRIORITY, 10, 2),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_32, 12, 2),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_16_64, 14, 2),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_32, 16, 2),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_16_64, 18, 2),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_PRIV, 20, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_DX10_CLAMP, 21, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_DEBUG_MODE, 22, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_IEEE_MODE, 23, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_BULKY, 24, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_CDBG_USER, 25, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_RESERVED1, 26, 6)
-};
-
-// AMD System VGPR Workitem ID Enumeration Values.
-enum amd_system_vgpr_workitem_id_t {
-  AMD_SYSTEM_VGPR_WORKITEM_ID_X = 0,
-  AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y = 1,
-  AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y_Z = 2,
-  AMD_SYSTEM_VGPR_WORKITEM_ID_UNDEFINED = 3
-};
-
-// AMD Compute Program Resource Register Two.
-typedef uint32_t amd_compute_pgm_rsrc_two32_t;
-enum amd_compute_pgm_rsrc_two_t {
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_PRIVATE_SEGMENT_WAVE_BYTE_OFFSET, 0, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_USER_SGPR_COUNT, 1, 5),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_TRAP_HANDLER, 6, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_X, 7, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_INFO, 10, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_VGPR_WORKITEM_ID, 11, 2),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_ADDRESS_WATCH, 13, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_MEMORY_VIOLATION, 14, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_GRANULATED_LDS_SIZE, 15, 9),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 24, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 25, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 26, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 27, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 28, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 29, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_INT_DIVISION_BY_ZERO, 30, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_RESERVED1, 31, 1)
-};
-
-// AMD Element Byte Size Enumeration Values.
-enum amd_element_byte_size_t {
-  AMD_ELEMENT_BYTE_SIZE_2 = 0,
-  AMD_ELEMENT_BYTE_SIZE_4 = 1,
-  AMD_ELEMENT_BYTE_SIZE_8 = 2,
-  AMD_ELEMENT_BYTE_SIZE_16 = 3
-};
-
-// AMD Kernel Code Properties.
-typedef uint32_t amd_kernel_code_properties32_t;
-enum amd_kernel_code_properties_t {
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 0, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_PTR, 1, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_QUEUE_PTR, 2, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_ID, 4, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X, 7, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y, 8, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z, 9, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_RESERVED1, 10, 6),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_ORDERED_APPEND_GDS, 16, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_PRIVATE_ELEMENT_SIZE, 17, 2),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_PTR64, 19, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_DYNAMIC_CALLSTACK, 20, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_DEBUG_ENABLED, 21, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_XNACK_ENABLED, 22, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_RESERVED2, 23, 9)
-};
-
-// AMD Power Of Two Enumeration Values.
-typedef uint8_t amd_powertwo8_t;
-enum amd_powertwo_t {
-  AMD_POWERTWO_1 = 0,
-  AMD_POWERTWO_2 = 1,
-  AMD_POWERTWO_4 = 2,
-  AMD_POWERTWO_8 = 3,
-  AMD_POWERTWO_16 = 4,
-  AMD_POWERTWO_32 = 5,
-  AMD_POWERTWO_64 = 6,
-  AMD_POWERTWO_128 = 7,
-  AMD_POWERTWO_256 = 8
-};
-
-// AMD Enabled Control Directive Enumeration Values.
-typedef uint64_t amd_enabled_control_directive64_t;
-enum amd_enabled_control_directive_t {
-  AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_BREAK_EXCEPTIONS = 1,
-  AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_DETECT_EXCEPTIONS = 2,
-  AMD_ENABLED_CONTROL_DIRECTIVE_MAX_DYNAMIC_GROUP_SIZE = 4,
-  AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_GRID_SIZE = 8,
-  AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_WORKGROUP_SIZE = 16,
-  AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_DIM = 32,
-  AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_GRID_SIZE = 64,
-  AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_WORKGROUP_SIZE = 128,
-  AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRE_NO_PARTIAL_WORKGROUPS = 256
-};
-
-// AMD Exception Kind Enumeration Values.
-typedef uint16_t amd_exception_kind16_t;
-enum amd_exception_kind_t {
-  AMD_EXCEPTION_KIND_INVALID_OPERATION = 1,
-  AMD_EXCEPTION_KIND_DIVISION_BY_ZERO = 2,
-  AMD_EXCEPTION_KIND_OVERFLOW = 4,
-  AMD_EXCEPTION_KIND_UNDERFLOW = 8,
-  AMD_EXCEPTION_KIND_INEXACT = 16
-};
-
-// AMD Control Directives.
-#define AMD_CONTROL_DIRECTIVES_ALIGN_BYTES 64
-#define AMD_CONTROL_DIRECTIVES_ALIGN __ALIGNED__(AMD_CONTROL_DIRECTIVES_ALIGN_BYTES)
-typedef AMD_CONTROL_DIRECTIVES_ALIGN struct amd_control_directives_s {
-  amd_enabled_control_directive64_t enabled_control_directives;
-  uint16_t enable_break_exceptions;
-  uint16_t enable_detect_exceptions;
-  uint32_t max_dynamic_group_size;
-  uint64_t max_flat_grid_size;
-  uint32_t max_flat_workgroup_size;
-  uint8_t required_dim;
-  uint8_t reserved1[3];
-  uint64_t required_grid_size[3];
-  uint32_t required_workgroup_size[3];
-  uint8_t reserved2[60];
-} amd_control_directives_t;
-
-// AMD Kernel Code.
-#define AMD_ISA_ALIGN_BYTES 256
-#define AMD_KERNEL_CODE_ALIGN_BYTES 64
-#define AMD_KERNEL_CODE_ALIGN __ALIGNED__(AMD_KERNEL_CODE_ALIGN_BYTES)
-typedef AMD_KERNEL_CODE_ALIGN struct amd_kernel_code_s {
-  amd_kernel_code_version32_t amd_kernel_code_version_major;
-  amd_kernel_code_version32_t amd_kernel_code_version_minor;
-  amd_machine_kind16_t amd_machine_kind;
-  amd_machine_version16_t amd_machine_version_major;
-  amd_machine_version16_t amd_machine_version_minor;
-  amd_machine_version16_t amd_machine_version_stepping;
-  int64_t kernel_code_entry_byte_offset;
-  int64_t kernel_code_prefetch_byte_offset;
-  uint64_t kernel_code_prefetch_byte_size;
-  uint64_t max_scratch_backing_memory_byte_size;
-  amd_compute_pgm_rsrc_one32_t compute_pgm_rsrc1;
-  amd_compute_pgm_rsrc_two32_t compute_pgm_rsrc2;
-  amd_kernel_code_properties32_t kernel_code_properties;
-  uint32_t workitem_private_segment_byte_size;
-  uint32_t workgroup_group_segment_byte_size;
-  uint32_t gds_segment_byte_size;
-  uint64_t kernarg_segment_byte_size;
-  uint32_t workgroup_fbarrier_count;
-  uint16_t wavefront_sgpr_count;
-  uint16_t workitem_vgpr_count;
-  uint16_t reserved_vgpr_first;
-  uint16_t reserved_vgpr_count;
-  uint16_t reserved_sgpr_first;
-  uint16_t reserved_sgpr_count;
-  uint16_t debug_wavefront_private_segment_offset_sgpr;
-  uint16_t debug_private_segment_buffer_sgpr;
-  amd_powertwo8_t kernarg_segment_alignment;
-  amd_powertwo8_t group_segment_alignment;
-  amd_powertwo8_t private_segment_alignment;
-  amd_powertwo8_t wavefront_size;
-  int32_t call_convention;
-  uint8_t reserved1[12];
-  uint64_t runtime_loader_kernel_symbol;
-  amd_control_directives_t control_directives;
-} amd_kernel_code_t;
-
-// TODO: this struct should be completely gone once debugger designs/implements
-// Debugger APIs.
-typedef struct amd_runtime_loader_debug_info_s {
-  const void* elf_raw;
-  size_t elf_size;
-  const char *kernel_name;
-  const void *owning_segment;
-} amd_runtime_loader_debug_info_t;
-
-#endif // AMD_HSA_KERNEL_CODE_H
diff --git a/third_party/rocm/include/hsa/amd_hsa_queue.h b/third_party/rocm/include/hsa/amd_hsa_queue.h
deleted file mode 100644
index 8675ec4..0000000
--- a/third_party/rocm/include/hsa/amd_hsa_queue.h
+++ /dev/null
@@ -1,87 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-//
-// The University of Illinois/NCSA
-// Open Source License (NCSA)
-//
-// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
-//
-// Developed by:
-//
-//                 AMD Research and AMD HSA Software Development
-//
-//                 Advanced Micro Devices, Inc.
-//
-//                 www.amd.com
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to
-// deal with the Software without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-// and/or sell copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following conditions:
-//
-//  - Redistributions of source code must retain the above copyright notice,
-//    this list of conditions and the following disclaimers.
-//  - Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimers in
-//    the documentation and/or other materials provided with the distribution.
-//  - Neither the names of Advanced Micro Devices, Inc,
-//    nor the names of its contributors may be used to endorse or promote
-//    products derived from this Software without specific prior written
-//    permission.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-// DEALINGS WITH THE SOFTWARE.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef AMD_HSA_QUEUE_H
-#define AMD_HSA_QUEUE_H
-
-#include "amd_hsa_common.h"
-#include "hsa.h"
-
-// AMD Queue Properties.
-typedef uint32_t amd_queue_properties32_t;
-enum amd_queue_properties_t {
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER, 0, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_IS_PTR64, 1, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER_DEBUG_SGPRS, 2, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_PROFILING, 3, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_USE_SCRATCH_ONCE, 4, 1),
-  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_RESERVED1, 5, 27)
-};
-
-// AMD Queue.
-#define AMD_QUEUE_ALIGN_BYTES 64
-#define AMD_QUEUE_ALIGN __ALIGNED__(AMD_QUEUE_ALIGN_BYTES)
-typedef struct AMD_QUEUE_ALIGN amd_queue_s {
-  hsa_queue_t hsa_queue;
-  uint32_t reserved1[4];
-  volatile uint64_t write_dispatch_id;
-  uint32_t group_segment_aperture_base_hi;
-  uint32_t private_segment_aperture_base_hi;
-  uint32_t max_cu_id;
-  uint32_t max_wave_id;
-  volatile uint64_t max_legacy_doorbell_dispatch_id_plus_1;
-  volatile uint32_t legacy_doorbell_lock;
-  uint32_t reserved2[9];
-  volatile uint64_t read_dispatch_id;
-  uint32_t read_dispatch_id_field_base_byte_offset;
-  uint32_t compute_tmpring_size;
-  uint32_t scratch_resource_descriptor[4];
-  uint64_t scratch_backing_memory_location;
-  uint64_t scratch_backing_memory_byte_size;
-  uint32_t scratch_wave64_lane_byte_size;
-  amd_queue_properties32_t queue_properties;
-  uint32_t reserved3[2];
-  hsa_signal_t queue_inactive_signal;
-  uint32_t reserved4[14];
-} amd_queue_t;
-
-#endif // AMD_HSA_QUEUE_H
diff --git a/third_party/rocm/include/hsa/amd_hsa_signal.h b/third_party/rocm/include/hsa/amd_hsa_signal.h
deleted file mode 100644
index f9d721f..0000000
--- a/third_party/rocm/include/hsa/amd_hsa_signal.h
+++ /dev/null
@@ -1,80 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-//
-// The University of Illinois/NCSA
-// Open Source License (NCSA)
-// 
-// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
-// 
-// Developed by:
-// 
-//                 AMD Research and AMD HSA Software Development
-// 
-//                 Advanced Micro Devices, Inc.
-// 
-//                 www.amd.com
-// 
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to
-// deal with the Software without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-// and/or sell copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following conditions:
-// 
-//  - Redistributions of source code must retain the above copyright notice,
-//    this list of conditions and the following disclaimers.
-//  - Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimers in
-//    the documentation and/or other materials provided with the distribution.
-//  - Neither the names of Advanced Micro Devices, Inc,
-//    nor the names of its contributors may be used to endorse or promote
-//    products derived from this Software without specific prior written
-//    permission.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-// DEALINGS WITH THE SOFTWARE.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef AMD_HSA_SIGNAL_H
-#define AMD_HSA_SIGNAL_H
-
-#include "amd_hsa_common.h"
-#include "amd_hsa_queue.h"
-
-// AMD Signal Kind Enumeration Values.
-typedef int64_t amd_signal_kind64_t;
-enum amd_signal_kind_t {
-  AMD_SIGNAL_KIND_INVALID = 0,
-  AMD_SIGNAL_KIND_USER = 1,
-  AMD_SIGNAL_KIND_DOORBELL = -1,
-  AMD_SIGNAL_KIND_LEGACY_DOORBELL = -2
-};
-
-// AMD Signal.
-#define AMD_SIGNAL_ALIGN_BYTES 64
-#define AMD_SIGNAL_ALIGN __ALIGNED__(AMD_SIGNAL_ALIGN_BYTES)
-typedef struct AMD_SIGNAL_ALIGN amd_signal_s {
-  amd_signal_kind64_t kind;
-  union {
-    volatile int64_t value;
-    volatile uint32_t* legacy_hardware_doorbell_ptr;
-    volatile uint64_t* hardware_doorbell_ptr;
-  };
-  uint64_t event_mailbox_ptr;
-  uint32_t event_id;
-  uint32_t reserved1;
-  uint64_t start_ts;
-  uint64_t end_ts;
-  union {
-    amd_queue_t* queue_ptr;
-    uint64_t reserved2;
-  };
-  uint32_t reserved3[2];
-} amd_signal_t;
-
-#endif // AMD_HSA_SIGNAL_H
diff --git a/third_party/rocm/include/hsa/hsa.h b/third_party/rocm/include/hsa/hsa.h
deleted file mode 100644
index d8fdd47..0000000
--- a/third_party/rocm/include/hsa/hsa.h
+++ /dev/null
@@ -1,5660 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-//
-// The University of Illinois/NCSA
-// Open Source License (NCSA)
-//
-// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
-//
-// Developed by:
-//
-//                 AMD Research and AMD HSA Software Development
-//
-//                 Advanced Micro Devices, Inc.
-//
-//                 www.amd.com
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to
-// deal with the Software without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-// and/or sell copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following conditions:
-//
-//  - Redistributions of source code must retain the above copyright notice,
-//    this list of conditions and the following disclaimers.
-//  - Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimers in
-//    the documentation and/or other materials provided with the distribution.
-//  - Neither the names of Advanced Micro Devices, Inc,
-//    nor the names of its contributors may be used to endorse or promote
-//    products derived from this Software without specific prior written
-//    permission.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-// DEALINGS WITH THE SOFTWARE.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef HSA_RUNTIME_INC_HSA_H_
-#define HSA_RUNTIME_INC_HSA_H_
-
-#include <stddef.h>   /* size_t */
-#include <stdint.h>   /* uintXX_t */
-
-#ifndef __cplusplus
-#include <stdbool.h>  /* bool */
-#endif /* __cplusplus */
-
-// Placeholder for calling convention and import/export macros
-#ifndef HSA_CALL
-#define HSA_CALL
-#endif
-
-#ifndef HSA_EXPORT_DECORATOR
-#ifdef __GNUC__
-#define HSA_EXPORT_DECORATOR __attribute__ ((visibility ("default")))
-#else
-#define HSA_EXPORT_DECORATOR
-#endif
-#endif
-#define HSA_API_EXPORT HSA_EXPORT_DECORATOR HSA_CALL
-#define HSA_API_IMPORT HSA_CALL
-
-#if !defined(HSA_API) && defined(HSA_EXPORT)
-#define HSA_API HSA_API_EXPORT
-#else
-#define HSA_API HSA_API_IMPORT
-#endif
-
-// Detect and set large model builds.
-#undef HSA_LARGE_MODEL
-#if defined(__LP64__) || defined(_M_X64)
-#define HSA_LARGE_MODEL
-#endif
-
-// Try to detect CPU endianness
-#if !defined(LITTLEENDIAN_CPU) && !defined(BIGENDIAN_CPU)
-#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || \
-    defined(_M_X64)
-#define LITTLEENDIAN_CPU
-#endif
-#endif
-
-#undef HSA_LITTLE_ENDIAN
-#if defined(LITTLEENDIAN_CPU)
-#define HSA_LITTLE_ENDIAN
-#elif defined(BIGENDIAN_CPU)
-#else
-#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined"
-#endif
-
-#ifndef HSA_DEPRECATED
-#define HSA_DEPRECATED
-//#ifdef __GNUC__
-//#define HSA_DEPRECATED __attribute__((deprecated))
-//#else
-//#define HSA_DEPRECATED __declspec(deprecated)
-//#endif
-#endif
-
-#define HSA_VERSION_1_0                              1
-
-#ifdef __cplusplus
-extern "C" {
-#endif  /* __cplusplus */
-
-/** \defgroup status Runtime Notifications
- *  @{
- */
-
-/**
- * @brief Status codes.
- */
-typedef enum {
-  /**
-   * The function has been executed successfully.
-   */
-  HSA_STATUS_SUCCESS = 0x0,
-  /**
-   * A traversal over a list of elements has been interrupted by the
-   * application before completing.
-   */
-  HSA_STATUS_INFO_BREAK = 0x1,
-  /**
-   * A generic error has occurred.
-   */
-  HSA_STATUS_ERROR = 0x1000,
-  /**
-   * One of the actual arguments does not meet a precondition stated in the
-   * documentation of the corresponding formal argument.
-   */
-  HSA_STATUS_ERROR_INVALID_ARGUMENT = 0x1001,
-  /**
-   * The requested queue creation is not valid.
-   */
-  HSA_STATUS_ERROR_INVALID_QUEUE_CREATION = 0x1002,
-  /**
-   * The requested allocation is not valid.
-   */
-  HSA_STATUS_ERROR_INVALID_ALLOCATION = 0x1003,
-  /**
-   * The agent is invalid.
-   */
-  HSA_STATUS_ERROR_INVALID_AGENT = 0x1004,
-  /**
-   * The memory region is invalid.
-   */
-  HSA_STATUS_ERROR_INVALID_REGION = 0x1005,
-  /**
-   * The signal is invalid.
-   */
-  HSA_STATUS_ERROR_INVALID_SIGNAL = 0x1006,
-  /**
-   * The queue is invalid.
-   */
-  HSA_STATUS_ERROR_INVALID_QUEUE = 0x1007,
-  /**
-   * The HSA runtime failed to allocate the necessary resources. This error
-   * may also occur when the HSA runtime needs to spawn threads or create
-   * internal OS-specific events.
-   */
-  HSA_STATUS_ERROR_OUT_OF_RESOURCES = 0x1008,
-  /**
-   * The AQL packet is malformed.
-   */
-  HSA_STATUS_ERROR_INVALID_PACKET_FORMAT = 0x1009,
-  /**
-   * An error has been detected while releasing a resource.
-   */
-  HSA_STATUS_ERROR_RESOURCE_FREE = 0x100A,
-  /**
-   * An API other than ::hsa_init has been invoked while the reference count
-   * of the HSA runtime is 0.
-   */
-  HSA_STATUS_ERROR_NOT_INITIALIZED = 0x100B,
-  /**
-   * The maximum reference count for the object has been reached.
-   */
-  HSA_STATUS_ERROR_REFCOUNT_OVERFLOW = 0x100C,
-  /**
-   * The arguments passed to a functions are not compatible.
-   */
-  HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS = 0x100D,
-  /**
-   * The index is invalid.
-   */
-  HSA_STATUS_ERROR_INVALID_INDEX = 0x100E,
-  /**
-   * The instruction set architecture is invalid.
-   */
-  HSA_STATUS_ERROR_INVALID_ISA = 0x100F,
-  /**
-   * The instruction set architecture name is invalid.
-   */
-  HSA_STATUS_ERROR_INVALID_ISA_NAME = 0x1017,
-  /**
-   * The code object is invalid.
-   */
-  HSA_STATUS_ERROR_INVALID_CODE_OBJECT = 0x1010,
-  /**
-   * The executable is invalid.
-   */
-  HSA_STATUS_ERROR_INVALID_EXECUTABLE = 0x1011,
-  /**
-   * The executable is frozen.
-   */
-  HSA_STATUS_ERROR_FROZEN_EXECUTABLE = 0x1012,
-  /**
-   * There is no symbol with the given name.
-   */
-  HSA_STATUS_ERROR_INVALID_SYMBOL_NAME = 0x1013,
-  /**
-   * The variable is already defined.
-   */
-  HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED = 0x1014,
-  /**
-   * The variable is undefined.
-   */
-  HSA_STATUS_ERROR_VARIABLE_UNDEFINED = 0x1015,
-  /**
-   * An HSAIL operation resulted in a hardware exception.
-   */
-  HSA_STATUS_ERROR_EXCEPTION = 0x1016,
-  /**
-   * The code object symbol is invalid.
-   */
-  HSA_STATUS_ERROR_INVALID_CODE_SYMBOL = 0x1018,
-  /**
-   * The executable symbol is invalid.
-   */
-  HSA_STATUS_ERROR_INVALID_EXECUTABLE_SYMBOL = 0x1019,
-  /**
-   * The file descriptor is invalid.
-   */
-  HSA_STATUS_ERROR_INVALID_FILE = 0x1020,
-  /**
-   * The code object reader is invalid.
-   */
-  HSA_STATUS_ERROR_INVALID_CODE_OBJECT_READER = 0x1021,
-  /**
-   * The cache is invalid.
-   */
-  HSA_STATUS_ERROR_INVALID_CACHE = 0x1022,
-  /**
-   * The wavefront is invalid.
-   */
-  HSA_STATUS_ERROR_INVALID_WAVEFRONT = 0x1023,
-  /**
-   * The signal group is invalid.
-   */
-  HSA_STATUS_ERROR_INVALID_SIGNAL_GROUP = 0x1024,
-  /**
-   * The HSA runtime is not in the configuration state.
-   */
-  HSA_STATUS_ERROR_INVALID_RUNTIME_STATE = 0x1025,
-  /**
-  * The queue received an error that may require process termination.
-  */
-  HSA_STATUS_ERROR_FATAL = 0x1026
-} hsa_status_t;
-
-/**
- * @brief Query additional information about a status code.
- *
- * @param[in] status Status code.
- *
- * @param[out] status_string A NUL-terminated string that describes the error
- * status.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p status is an invalid
- * status code, or @p status_string is NULL.
- */
-hsa_status_t HSA_API hsa_status_string(
-    hsa_status_t status,
-    const char ** status_string);
-
-/** @} */
-
-/** \defgroup common Common Definitions
- *  @{
- */
-
-/**
- * @brief Three-dimensional coordinate.
- */
-typedef struct hsa_dim3_s {
-  /**
-   * X dimension.
-   */
-   uint32_t x;
-
-  /**
-   * Y dimension.
-   */
-   uint32_t y;
-
-   /**
-    * Z dimension.
-    */
-   uint32_t z;
-} hsa_dim3_t;
-
-/**
- * @brief Access permissions.
- */
-typedef enum {
-  /**
-   * Read-only access.
-   */
-  HSA_ACCESS_PERMISSION_RO = 1,
-  /**
-   * Write-only access.
-   */
-  HSA_ACCESS_PERMISSION_WO = 2,
-  /**
-   * Read and write access.
-   */
-  HSA_ACCESS_PERMISSION_RW = 3
-} hsa_access_permission_t;
-
-/**
- * @brief POSIX file descriptor.
- */
-typedef int hsa_file_t;
-
-/** @} **/
-
-
-/** \defgroup initshutdown Initialization and Shut Down
- *  @{
- */
-
-/**
- * @brief Initialize the HSA runtime.
- *
- * @details Initializes the HSA runtime if it is not already initialized, and
- * increases the reference counter associated with the HSA runtime for the
- * current process. Invocation of any HSA function other than ::hsa_init results
- * in undefined behavior if the current HSA runtime reference counter is less
- * than one.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
- * the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_REFCOUNT_OVERFLOW The HSA runtime reference
- * count reaches INT32_MAX.
- */
-hsa_status_t HSA_API hsa_init();
-
-/**
- * @brief Shut down the HSA runtime.
- *
- * @details Decreases the reference count of the HSA runtime instance. When the
- * reference count reaches 0, the HSA runtime is no longer considered valid
- * but the application might call ::hsa_init to initialize the HSA runtime
- * again.
- *
- * Once the reference count of the HSA runtime reaches 0, all the resources
- * associated with it (queues, signals, agent information, etc.) are
- * considered invalid and any attempt to reference them in subsequent API calls
- * results in undefined behavior. When the reference count reaches 0, the HSA
- * runtime may release resources associated with it.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- */
-hsa_status_t HSA_API hsa_shut_down();
-
-/** @} **/
-
-/** \defgroup agentinfo System and Agent Information
- *  @{
- */
-
-/**
- * @brief Endianness. A convention used to interpret the bytes making up a data
- * word.
- */
-typedef enum {
-    /**
-     * The least significant byte is stored in the smallest address.
-     */
-    HSA_ENDIANNESS_LITTLE = 0,
-    /**
-     * The most significant byte is stored in the smallest address.
-     */
-    HSA_ENDIANNESS_BIG = 1
-} hsa_endianness_t;
-
-/**
- * @brief Machine model. A machine model determines the size of certain data
- * types in HSA runtime and an agent.
- */
-typedef enum {
-    /**
-     * Small machine model. Addresses use 32 bits.
-     */
-    HSA_MACHINE_MODEL_SMALL = 0,
-    /**
-     * Large machine model. Addresses use 64 bits.
-     */
-    HSA_MACHINE_MODEL_LARGE = 1
-} hsa_machine_model_t;
-
-/**
- * @brief Profile. A profile indicates a particular level of feature
- * support. For example, in the base profile the application must use the HSA
- * runtime allocator to reserve shared virtual memory, while in the full profile
- * any host pointer can be shared across all the agents.
- */
-typedef enum {
-    /**
-     * Base profile.
-     */
-    HSA_PROFILE_BASE = 0,
-    /**
-     * Full profile.
-     */
-    HSA_PROFILE_FULL = 1
-} hsa_profile_t;
-
-/**
- * @brief System attributes.
- */
-typedef enum {
-  /**
-   * Major version of the HSA runtime specification supported by the
-   * implementation. The type of this attribute is uint16_t.
-   */
-  HSA_SYSTEM_INFO_VERSION_MAJOR = 0,
-  /**
-   * Minor version of the HSA runtime specification supported by the
-   * implementation. The type of this attribute is uint16_t.
-   */
-  HSA_SYSTEM_INFO_VERSION_MINOR = 1,
-  /**
-   * Current timestamp. The value of this attribute monotonically increases at a
-   * constant rate. The type of this attribute is uint64_t.
-   */
-  HSA_SYSTEM_INFO_TIMESTAMP = 2,
-  /**
-   * Timestamp value increase rate, in Hz. The timestamp (clock) frequency is
-   * in the range 1-400MHz. The type of this attribute is uint64_t.
-   */
-  HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY = 3,
-  /**
-   * Maximum duration of a signal wait operation. Expressed as a count based on
-   * the timestamp frequency. The type of this attribute is uint64_t.
-   */
-  HSA_SYSTEM_INFO_SIGNAL_MAX_WAIT = 4,
-  /**
-   * Endianness of the system. The type of this attribute is ::hsa_endianness_t.
-   */
-  HSA_SYSTEM_INFO_ENDIANNESS = 5,
-  /**
-   * Machine model supported by the HSA runtime. The type of this attribute is
-   * ::hsa_machine_model_t.
-   */
-  HSA_SYSTEM_INFO_MACHINE_MODEL = 6,
-  /**
-   * Bit-mask indicating which extensions are supported by the
-   * implementation. An extension with an ID of @p i is supported if the bit at
-   * position @p i is set. The type of this attribute is uint8_t[128].
-   */
-  HSA_SYSTEM_INFO_EXTENSIONS = 7,
-  /**
-  * String containing the ROCr build identifier.
-  */
-  HSA_AMD_SYSTEM_INFO_BUILD_VERSION = 0x200
-} hsa_system_info_t;
-
-/**
- * @brief Get the current value of a system attribute.
- *
- * @param[in] attribute Attribute to query.
- *
- * @param[out] value Pointer to an application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
- * system attribute, or @p value is NULL.
- */
-hsa_status_t HSA_API hsa_system_get_info(
-    hsa_system_info_t attribute,
-    void* value);
-
-/**
- * @brief HSA extensions.
- */
-typedef enum {
-  /**
-   * Finalizer extension.
-   */
-  HSA_EXTENSION_FINALIZER = 0,
-  /**
-   * Images extension.
-   */
-  HSA_EXTENSION_IMAGES = 1,
-
-  /**
-   * Performance counter extension.
-   */
-  HSA_EXTENSION_PERFORMANCE_COUNTERS = 2,
-
-  /**
-   * Profiling events extension.
-   */
-  HSA_EXTENSION_PROFILING_EVENTS = 3,
-  /**
-   * Extension count.
-   */
-  HSA_EXTENSION_STD_LAST = 3,
-  /**
-   * First AMD extension number.
-   */
-  HSA_AMD_FIRST_EXTENSION = 0x200,
-  /**
-   * Profiler extension.
-   */
-  HSA_EXTENSION_AMD_PROFILER = 0x200,
-  /**
-   * Loader extension.
-   */
-  HSA_EXTENSION_AMD_LOADER = 0x201,
-  /**
-   * AqlProfile extension.
-   */
-  HSA_EXTENSION_AMD_AQLPROFILE = 0x202,
-  /**
-   * Last AMD extension.
-   */
-  HSA_AMD_LAST_EXTENSION = 0x202
-} hsa_extension_t;
-
-/**
- * @brief Query the name of a given extension.
- *
- * @param[in] extension Extension identifier. If the extension is not supported
- * by the implementation (see ::HSA_SYSTEM_INFO_EXTENSIONS), the behavior
- * is undefined.
- *
- * @param[out] name Pointer to a memory location where the HSA runtime stores
- * the extension name. The extension name is a NUL-terminated string.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid
- * extension, or @p name is NULL.
- */
-hsa_status_t HSA_API hsa_extension_get_name(
-    uint16_t extension,
-    const char **name);
-
-/**
- * @deprecated
- *
- * @brief Query if a given version of an extension is supported by the HSA
- * implementation.
- *
- * @param[in] extension Extension identifier.
- *
- * @param[in] version_major Major version number.
- *
- * @param[in] version_minor Minor version number.
- *
- * @param[out] result Pointer to a memory location where the HSA runtime stores
- * the result of the check. The result is true if the specified version of the
- * extension is supported, and false otherwise.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid
- * extension, or @p result is NULL.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_system_extension_supported(
-    uint16_t extension,
-    uint16_t version_major,
-    uint16_t version_minor,
-    bool* result);
-
-/**
- * @brief Query if a given version of an extension is supported by the HSA
- * implementation. All minor versions from 0 up to the returned @p version_minor
- * must be supported by the implementation.
- *
- * @param[in] extension Extension identifier.
- *
- * @param[in] version_major Major version number.
- *
- * @param[out] version_minor Minor version number.
- *
- * @param[out] result Pointer to a memory location where the HSA runtime stores
- * the result of the check. The result is true if the specified version of the
- * extension is supported, and false otherwise.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid
- * extension, or @p version_minor is NULL, or @p result is NULL.
- */
-hsa_status_t HSA_API hsa_system_major_extension_supported(
-    uint16_t extension,
-    uint16_t version_major,
-    uint16_t *version_minor,
-    bool* result);
-
-
-/**
- * @deprecated
- *
- * @brief Retrieve the function pointers corresponding to a given version of an
- * extension. Portable applications are expected to invoke the extension API
- * using the returned function pointers
- *
- * @details The application is responsible for verifying that the given version
- * of the extension is supported by the HSA implementation (see
- * ::hsa_system_extension_supported). If the given combination of extension,
- * major version, and minor version is not supported by the implementation, the
- * behavior is undefined.
- *
- * @param[in] extension Extension identifier.
- *
- * @param[in] version_major Major version number for which to retrieve the
- * function pointer table.
- *
- * @param[in] version_minor Minor version number for which to retrieve the
- * function pointer table.
- *
- * @param[out] table Pointer to an application-allocated function pointer table
- * that is populated by the HSA runtime. Must not be NULL. The memory associated
- * with table can be reused or freed after the function returns.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid
- * extension, or @p table is NULL.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_system_get_extension_table(
-    uint16_t extension,
-    uint16_t version_major,
-    uint16_t version_minor,
-    void *table);
-
-/**
- * @brief Retrieve the function pointers corresponding to a given major version
- * of an extension. Portable applications are expected to invoke the extension
- * API using the returned function pointers.
- *
- * @details The application is responsible for verifying that the given major
- * version of the extension is supported by the HSA implementation (see
- * ::hsa_system_major_extension_supported). If the given combination of extension
- * and major version is not supported by the implementation, the behavior is
- * undefined. Additionally if the length doesn't allow space for a full minor
- * version, it is implementation defined if only some of the function pointers for
- * that minor version get written.
- *
- * @param[in] extension Extension identifier.
- *
- * @param[in] version_major Major version number for which to retrieve the
- * function pointer table.
- *
- * @param[in] table_length Size in bytes of the function pointer table to be
- * populated. The implementation will not write more than this many bytes to the
- * table.
- *
- * @param[out] table Pointer to an application-allocated function pointer table
- * that is populated by the HSA runtime. Must not be NULL. The memory associated
- * with table can be reused or freed after the function returns.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid
- * extension, or @p table is NULL.
- */
-hsa_status_t HSA_API hsa_system_get_major_extension_table(
-    uint16_t extension,
-    uint16_t version_major,
-    size_t table_length,
-    void *table);
-
-/**
- * @brief Struct containing an opaque handle to an agent, a device that participates in
- * the HSA memory model. An agent can submit AQL packets for execution, and
- * may also accept AQL packets for execution (agent dispatch packets or kernel
- * dispatch packets launching HSAIL-derived binaries).
- */
-typedef struct hsa_agent_s {
-  /**
-   * Opaque handle. Two handles reference the same object of the enclosing type
-   * if and only if they are equal.
-   */
-  uint64_t handle;
-} hsa_agent_t;
-
-/**
- * @brief Agent features.
- */
-typedef enum {
-    /**
-     * The agent supports AQL packets of kernel dispatch type. If this
-     * feature is enabled, the agent is also a kernel agent.
-     */
-    HSA_AGENT_FEATURE_KERNEL_DISPATCH = 1,
-    /**
-     * The agent supports AQL packets of agent dispatch type.
-     */
-    HSA_AGENT_FEATURE_AGENT_DISPATCH = 2
-} hsa_agent_feature_t;
-
-/**
- * @brief Hardware device type.
- */
-typedef enum {
-    /**
-     * CPU device.
-     */
-    HSA_DEVICE_TYPE_CPU = 0,
-    /**
-     * GPU device.
-     */
-    HSA_DEVICE_TYPE_GPU = 1,
-    /**
-     * DSP device.
-     */
-    HSA_DEVICE_TYPE_DSP = 2
-} hsa_device_type_t;
-
-/**
- * @brief Default floating-point rounding mode.
- */
-typedef enum {
-  /**
-   * Use a default floating-point rounding mode specified elsewhere.
-   */
-  HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT = 0,
-  /**
-   * Operations that specify the default floating-point mode are rounded to zero
-   * by default.
-   */
-  HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO = 1,
-  /**
-   * Operations that specify the default floating-point mode are rounded to the
-   * nearest representable number and that ties should be broken by selecting
-   * the value with an even least significant bit.
-   */
-  HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR = 2
-} hsa_default_float_rounding_mode_t;
-
-/**
- * @brief Agent attributes.
- */
-typedef enum {
-  /**
-   * Agent name. The type of this attribute is a NUL-terminated char[64]. The
-   * name must be at most 63 characters long (not including the NUL terminator)
-   * and all array elements not used for the name must be NUL.
-   */
-  HSA_AGENT_INFO_NAME = 0,
-  /**
-   * Name of vendor. The type of this attribute is a NUL-terminated char[64].
-   * The name must be at most 63 characters long (not including the NUL
-   * terminator) and all array elements not used for the name must be NUL.
-   */
-  HSA_AGENT_INFO_VENDOR_NAME = 1,
-  /**
-   * Agent capability. The type of this attribute is ::hsa_agent_feature_t.
-   */
-  HSA_AGENT_INFO_FEATURE = 2,
-  /**
-   * @deprecated Query ::HSA_ISA_INFO_MACHINE_MODELS for a given intruction set
-   * architecture supported by the agent instead.  If more than one ISA is
-   * supported by the agent, the returned value corresponds to the first ISA
-   * enumerated by ::hsa_agent_iterate_isas.
-   *
-   * Machine model supported by the agent. The type of this attribute is
-   * ::hsa_machine_model_t.
-   */
-  HSA_AGENT_INFO_MACHINE_MODEL = 3,
-  /**
-   * @deprecated Query ::HSA_ISA_INFO_PROFILES for a given intruction set
-   * architecture supported by the agent instead.  If more than one ISA is
-   * supported by the agent, the returned value corresponds to the first ISA
-   * enumerated by ::hsa_agent_iterate_isas.
-   *
-   * Profile supported by the agent. The type of this attribute is
-   * ::hsa_profile_t.
-   */
-  HSA_AGENT_INFO_PROFILE = 4,
-  /**
-   * @deprecated Query ::HSA_ISA_INFO_DEFAULT_FLOAT_ROUNDING_MODES for a given
-   * intruction set architecture supported by the agent instead.  If more than
-   * one ISA is supported by the agent, the returned value corresponds to the
-   * first ISA enumerated by ::hsa_agent_iterate_isas.
-   *
-   * Default floating-point rounding mode. The type of this attribute is
-   * ::hsa_default_float_rounding_mode_t, but the value
-   * ::HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT is not allowed.
-   */
-  HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 5,
-  /**
-   * @deprecated Query ::HSA_ISA_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES
-   * for a given intruction set architecture supported by the agent instead.  If
-   * more than one ISA is supported by the agent, the returned value corresponds
-   * to the first ISA enumerated by ::hsa_agent_iterate_isas.
-   *
-   * A bit-mask of ::hsa_default_float_rounding_mode_t values, representing the
-   * default floating-point rounding modes supported by the agent in the Base
-   * profile. The type of this attribute is uint32_t. The default floating-point
-   * rounding mode (::HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE) bit must not
-   * be set.
-   */
-  HSA_AGENT_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES = 23,
-  /**
-   * @deprecated Query ::HSA_ISA_INFO_FAST_F16_OPERATION for a given intruction
-   * set architecture supported by the agent instead.  If more than one ISA is
-   * supported by the agent, the returned value corresponds to the first ISA
-   * enumerated by ::hsa_agent_iterate_isas.
-   *
-   * Flag indicating that the f16 HSAIL operation is at least as fast as the
-   * f32 operation in the current agent. The value of this attribute is
-   * undefined if the agent is not a kernel agent. The type of this
-   * attribute is bool.
-   */
-  HSA_AGENT_INFO_FAST_F16_OPERATION = 24,
-  /**
-   * @deprecated Query ::HSA_WAVEFRONT_INFO_SIZE for a given wavefront and
-   * intruction set architecture supported by the agent instead.  If more than
-   * one ISA is supported by the agent, the returned value corresponds to the
-   * first ISA enumerated by ::hsa_agent_iterate_isas and the first wavefront
-   * enumerated by ::hsa_isa_iterate_wavefronts for that ISA.
-   *
-   * Number of work-items in a wavefront. Must be a power of 2 in the range
-   * [1,256]. The value of this attribute is undefined if the agent is not
-   * a kernel agent. The type of this attribute is uint32_t.
-   */
-  HSA_AGENT_INFO_WAVEFRONT_SIZE = 6,
-  /**
-   * @deprecated Query ::HSA_ISA_INFO_WORKGROUP_MAX_DIM for a given intruction
-   * set architecture supported by the agent instead.  If more than one ISA is
-   * supported by the agent, the returned value corresponds to the first ISA
-   * enumerated by ::hsa_agent_iterate_isas.
-   *
-   * Maximum number of work-items of each dimension of a work-group.  Each
-   * maximum must be greater than 0. No maximum can exceed the value of
-   * ::HSA_AGENT_INFO_WORKGROUP_MAX_SIZE. The value of this attribute is
-   * undefined if the agent is not a kernel agent. The type of this
-   * attribute is uint16_t[3].
-   */
-  HSA_AGENT_INFO_WORKGROUP_MAX_DIM = 7,
-  /**
-   * @deprecated Query ::HSA_ISA_INFO_WORKGROUP_MAX_SIZE for a given intruction
-   * set architecture supported by the agent instead.  If more than one ISA is
-   * supported by the agent, the returned value corresponds to the first ISA
-   * enumerated by ::hsa_agent_iterate_isas.
-   *
-   * Maximum total number of work-items in a work-group. The value of this
-   * attribute is undefined if the agent is not a kernel agent. The type
-   * of this attribute is uint32_t.
-   */
-  HSA_AGENT_INFO_WORKGROUP_MAX_SIZE = 8,
-  /**
-   * @deprecated Query ::HSA_ISA_INFO_GRID_MAX_DIM for a given intruction set
-   * architecture supported by the agent instead.
-   *
-   * Maximum number of work-items of each dimension of a grid. Each maximum must
-   * be greater than 0, and must not be smaller than the corresponding value in
-   * ::HSA_AGENT_INFO_WORKGROUP_MAX_DIM. No maximum can exceed the value of
-   * ::HSA_AGENT_INFO_GRID_MAX_SIZE. The value of this attribute is undefined
-   * if the agent is not a kernel agent. The type of this attribute is
-   * ::hsa_dim3_t.
-   */
-  HSA_AGENT_INFO_GRID_MAX_DIM = 9,
-  /**
-   * @deprecated Query ::HSA_ISA_INFO_GRID_MAX_SIZE for a given intruction set
-   * architecture supported by the agent instead.  If more than one ISA is
-   * supported by the agent, the returned value corresponds to the first ISA
-   * enumerated by ::hsa_agent_iterate_isas.
-   *
-   * Maximum total number of work-items in a grid. The value of this attribute
-   * is undefined if the agent is not a kernel agent. The type of this
-   * attribute is uint32_t.
-   */
-  HSA_AGENT_INFO_GRID_MAX_SIZE = 10,
-  /**
-   * @deprecated Query ::HSA_ISA_INFO_FBARRIER_MAX_SIZE for a given intruction
-   * set architecture supported by the agent instead.  If more than one ISA is
-   * supported by the agent, the returned value corresponds to the first ISA
-   * enumerated by ::hsa_agent_iterate_isas.
-   *
-   * Maximum number of fbarriers per work-group. Must be at least 32. The value
-   * of this attribute is undefined if the agent is not a kernel agent. The
-   * type of this attribute is uint32_t.
-   */
-  HSA_AGENT_INFO_FBARRIER_MAX_SIZE = 11,
-  /**
-   * @deprecated The maximum number of queues is not statically determined.
-   *
-   * Maximum number of queues that can be active (created but not destroyed) at
-   * one time in the agent. The type of this attribute is uint32_t.
-   */
-  HSA_AGENT_INFO_QUEUES_MAX = 12,
-  /**
-   * Minimum number of packets that a queue created in the agent
-   * can hold. Must be a power of 2 greater than 0. Must not exceed
-   * the value of ::HSA_AGENT_INFO_QUEUE_MAX_SIZE. The type of this
-   * attribute is uint32_t.
-   */
-  HSA_AGENT_INFO_QUEUE_MIN_SIZE = 13,
-  /**
-   * Maximum number of packets that a queue created in the agent can
-   * hold. Must be a power of 2 greater than 0. The type of this attribute
-   * is uint32_t.
-   */
-  HSA_AGENT_INFO_QUEUE_MAX_SIZE = 14,
-  /**
-   * Type of a queue created in the agent. The type of this attribute is
-   * ::hsa_queue_type32_t.
-   */
-  HSA_AGENT_INFO_QUEUE_TYPE = 15,
-  /**
-   * @deprecated NUMA information is not exposed anywhere else in the API.
-   *
-   * Identifier of the NUMA node associated with the agent. The type of this
-   * attribute is uint32_t.
-   */
-  HSA_AGENT_INFO_NODE = 16,
-  /**
-   * Type of hardware device associated with the agent. The type of this
-   * attribute is ::hsa_device_type_t.
-   */
-  HSA_AGENT_INFO_DEVICE = 17,
-  /**
-   * @deprecated Query ::hsa_agent_iterate_caches to retrieve information about
-   * the caches present in a given agent.
-   *
-   * Array of data cache sizes (L1..L4). Each size is expressed in bytes. A size
-   * of 0 for a particular level indicates that there is no cache information
-   * for that level. The type of this attribute is uint32_t[4].
-   */
-  HSA_AGENT_INFO_CACHE_SIZE = 18,
-  /**
-   * @deprecated An agent may support multiple instruction set
-   * architectures. See ::hsa_agent_iterate_isas.  If more than one ISA is
-   * supported by the agent, the returned value corresponds to the first ISA
-   * enumerated by ::hsa_agent_iterate_isas.
-   *
-   * Instruction set architecture of the agent. The type of this attribute
-   * is ::hsa_isa_t.
-   */
-  HSA_AGENT_INFO_ISA = 19,
-  /**
-   * Bit-mask indicating which extensions are supported by the agent. An
-   * extension with an ID of @p i is supported if the bit at position @p i is
-   * set. The type of this attribute is uint8_t[128].
-   */
-  HSA_AGENT_INFO_EXTENSIONS = 20,
-  /**
-   * Major version of the HSA runtime specification supported by the
-   * agent. The type of this attribute is uint16_t.
-   */
-  HSA_AGENT_INFO_VERSION_MAJOR = 21,
-  /**
-   * Minor version of the HSA runtime specification supported by the
-   * agent. The type of this attribute is uint16_t.
-   */
-  HSA_AGENT_INFO_VERSION_MINOR = 22
-
-} hsa_agent_info_t;
-
-/**
- * @brief Get the current value of an attribute for a given agent.
- *
- * @param[in] agent A valid agent.
- *
- * @param[in] attribute Attribute to query.
- *
- * @param[out] value Pointer to an application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
- * agent attribute, or @p value is NULL.
- */
-hsa_status_t HSA_API hsa_agent_get_info(
-    hsa_agent_t agent,
-    hsa_agent_info_t attribute,
-    void* value);
-
-/**
- * @brief Iterate over the available agents, and invoke an
- * application-defined callback on every iteration.
- *
- * @param[in] callback Callback to be invoked once per agent. The HSA
- * runtime passes two arguments to the callback: the agent and the
- * application data.  If @p callback returns a status other than
- * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
- * ::hsa_iterate_agents returns that status value.
- *
- * @param[in] data Application data that is passed to @p callback on every
- * iteration. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
-*/
-hsa_status_t HSA_API hsa_iterate_agents(
-    hsa_status_t (*callback)(hsa_agent_t agent, void* data),
-    void* data);
-
-/*
-
-// If we do not know the size of an attribute, we need to query it first
-// Note: this API will not be in the spec unless needed
-hsa_status_t HSA_API hsa_agent_get_info_size(
-    hsa_agent_t agent,
-    hsa_agent_info_t attribute,
-    size_t* size);
-
-// Set the value of an agents attribute
-// Note: this API will not be in the spec unless needed
-hsa_status_t HSA_API hsa_agent_set_info(
-    hsa_agent_t agent,
-    hsa_agent_info_t attribute,
-    void* value);
-
-*/
-
-/**
- * @brief Exception policies applied in the presence of hardware exceptions.
- */
-typedef enum {
-    /**
-     * If a hardware exception is detected, a work-item signals an exception.
-     */
-    HSA_EXCEPTION_POLICY_BREAK = 1,
-    /**
-     * If a hardware exception is detected, a hardware status bit is set.
-     */
-    HSA_EXCEPTION_POLICY_DETECT = 2
-} hsa_exception_policy_t;
-
-/**
- * @deprecated Use ::hsa_isa_get_exception_policies for a given intruction set
- * architecture supported by the agent instead. If more than one ISA is
- * supported by the agent, this function uses the first value returned by
- * ::hsa_agent_iterate_isas.
- *
- * @brief Retrieve the exception policy support for a given combination of
- * agent and profile
- *
- * @param[in] agent Agent.
- *
- * @param[in] profile Profile.
- *
- * @param[out] mask Pointer to a memory location where the HSA runtime stores a
- * mask of ::hsa_exception_policy_t values. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p profile is not a valid
- * profile, or @p mask is NULL.
- *
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_agent_get_exception_policies(
-    hsa_agent_t agent,
-    hsa_profile_t profile,
-    uint16_t *mask);
-
-/**
- * @brief Cache handle.
- */
-typedef struct hsa_cache_s {
-  /**
-   * Opaque handle. Two handles reference the same object of the enclosing type
-   * if and only if they are equal.
-   */
-  uint64_t handle;
-} hsa_cache_t;
-
-/**
- * @brief Cache attributes.
- */
-typedef enum {
-  /**
-   * The length of the cache name in bytes, not including the NUL terminator.
-   * The type of this attribute is uint32_t.
-   */
-  HSA_CACHE_INFO_NAME_LENGTH = 0,
-  /**
-   * Human-readable description.  The type of this attribute is a NUL-terminated
-   * character array with the length equal to the value of
-   * ::HSA_CACHE_INFO_NAME_LENGTH attribute.
-   */
-  HSA_CACHE_INFO_NAME = 1,
-  /**
-   * Cache level. A L1 cache must return a value of 1, a L2 must return a value
-   * of 2, and so on.  The type of this attribute is uint8_t.
-   */
-  HSA_CACHE_INFO_LEVEL = 2,
-  /**
-   * Cache size, in bytes. A value of 0 indicates that there is no size
-   * information available. The type of this attribute is uint32_t.
-   */
-  HSA_CACHE_INFO_SIZE = 3
-} hsa_cache_info_t;
-
-/**
- * @brief Get the current value of an attribute for a given cache object.
- *
- * @param[in] cache Cache.
- *
- * @param[in] attribute Attribute to query.
- *
- * @param[out] value Pointer to an application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_CACHE The cache is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
- * instruction set architecture attribute, or @p value is
- * NULL.
- */
-hsa_status_t HSA_API hsa_cache_get_info(
-    hsa_cache_t cache,
-    hsa_cache_info_t attribute,
-    void* value);
-
-/**
- * @brief Iterate over the memory caches of a given agent, and
- * invoke an application-defined callback on every iteration.
- *
- * @details Caches are visited in ascending order according to the value of the
- * ::HSA_CACHE_INFO_LEVEL attribute.
- *
- * @param[in] agent A valid agent.
- *
- * @param[in] callback Callback to be invoked once per cache that is present in
- * the agent.  The HSA runtime passes two arguments to the callback: the cache
- * and the application data.  If @p callback returns a status other than
- * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
- * that value is returned.
- *
- * @param[in] data Application data that is passed to @p callback on every
- * iteration. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
- */
-hsa_status_t HSA_API hsa_agent_iterate_caches(
-    hsa_agent_t agent,
-    hsa_status_t (*callback)(hsa_cache_t cache, void* data),
-    void* data);
-
-/**
- * @deprecated
- *
- * @brief Query if a given version of an extension is supported by an agent
- *
- * @param[in] extension Extension identifier.
- *
- * @param[in] agent Agent.
- *
- * @param[in] version_major Major version number.
- *
- * @param[in] version_minor Minor version number.
- *
- * @param[out] result Pointer to a memory location where the HSA runtime stores
- * the result of the check. The result is true if the specified version of the
- * extension is supported, and false otherwise. The result must be false if
- * ::hsa_system_extension_supported returns false for the same extension
- * version.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid
- * extension, or @p result is NULL.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_agent_extension_supported(
-    uint16_t extension,
-    hsa_agent_t agent,
-    uint16_t version_major,
-    uint16_t version_minor,
-    bool* result);
-
-/**
- * @brief Query if a given version of an extension is supported by an agent. All
- * minor versions from 0 up to the returned @p version_minor must be supported.
- *
- * @param[in] extension Extension identifier.
- *
- * @param[in] agent Agent.
- *
- * @param[in] version_major Major version number.
- *
- * @param[out] version_minor Minor version number.
- *
- * @param[out] result Pointer to a memory location where the HSA runtime stores
- * the result of the check. The result is true if the specified version of the
- * extension is supported, and false otherwise. The result must be false if
- * ::hsa_system_extension_supported returns false for the same extension
- * version.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid
- * extension, or @p version_minor is NULL, or @p result is NULL.
- */
-hsa_status_t HSA_API hsa_agent_major_extension_supported(
-    uint16_t extension,
-    hsa_agent_t agent,
-    uint16_t version_major,
-    uint16_t *version_minor,
-    bool* result);
-
-
-/** @} */
-
-
-/** \defgroup signals Signals
- *  @{
- */
-
-/**
- * @brief Signal handle.
- */
-typedef struct hsa_signal_s {
-  /**
-   * Opaque handle. Two handles reference the same object of the enclosing type
-   * if and only if they are equal. The value 0 is reserved.
-   */
-  uint64_t handle;
-} hsa_signal_t;
-
-/**
- * @brief Signal value. The value occupies 32 bits in small machine mode, and 64
- * bits in large machine mode.
- */
-#ifdef HSA_LARGE_MODEL
-  typedef int64_t hsa_signal_value_t;
-#else
-  typedef int32_t hsa_signal_value_t;
-#endif
-
-/**
- * @brief Create a signal.
- *
- * @param[in] initial_value Initial value of the signal.
- *
- * @param[in] num_consumers Size of @p consumers. A value of 0 indicates that
- * any agent might wait on the signal.
- *
- * @param[in] consumers List of agents that might consume (wait on) the
- * signal. If @p num_consumers is 0, this argument is ignored; otherwise, the
- * HSA runtime might use the list to optimize the handling of the signal
- * object. If an agent not listed in @p consumers waits on the returned
- * signal, the behavior is undefined. The memory associated with @p consumers
- * can be reused or freed after the function returns.
- *
- * @param[out] signal Pointer to a memory location where the HSA runtime will
- * store the newly created signal handle. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
- * the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p signal is NULL, @p
- * num_consumers is greater than 0 but @p consumers is NULL, or @p consumers
- * contains duplicates.
- */
-hsa_status_t HSA_API hsa_signal_create(
-    hsa_signal_value_t initial_value,
-    uint32_t num_consumers,
-    const hsa_agent_t *consumers,
-    hsa_signal_t *signal);
-
-/**
- * @brief Destroy a signal previous created by ::hsa_signal_create.
- *
- * @param[in] signal Signal.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_SIGNAL @p signal is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT The handle in @p signal is 0.
- */
-hsa_status_t HSA_API hsa_signal_destroy(
-    hsa_signal_t signal);
-
-/**
- * @brief Atomically read the current value of a signal.
- *
- * @param[in] signal Signal.
- *
- * @return Value of the signal.
-*/
-hsa_signal_value_t HSA_API hsa_signal_load_scacquire(
-    hsa_signal_t signal);
-
-/**
- * @copydoc hsa_signal_load_scacquire
- */
-hsa_signal_value_t HSA_API hsa_signal_load_relaxed(
-    hsa_signal_t signal);
-
-/**
- * @deprecated Renamed as ::hsa_signal_load_scacquire.
- *
- * @copydoc hsa_signal_load_scacquire
-*/
-hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_load_acquire(
-    hsa_signal_t signal);
-
-/**
- * @brief Atomically set the value of a signal.
- *
- * @details If the value of the signal is changed, all the agents waiting
- * on @p signal for which @p value satisfies their wait condition are awakened.
- *
- * @param[in] signal Signal.
- *
- * @param[in] value New signal value.
- */
-void HSA_API hsa_signal_store_relaxed(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_store_relaxed
- */
-void HSA_API hsa_signal_store_screlease(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @deprecated Renamed as ::hsa_signal_store_screlease.
- *
- * @copydoc hsa_signal_store_screlease
- */
-void HSA_API HSA_DEPRECATED hsa_signal_store_release(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @brief Atomically set the value of a signal without necessarily notifying the
- * the agents waiting on it.
- *
- * @details The agents waiting on @p signal may not wake up even when the new
- * value satisfies their wait condition. If the application wants to update the
- * signal and there is no need to notify any agent, invoking this function can
- * be more efficient than calling the non-silent counterpart.
- *
- * @param[in] signal Signal.
- *
- * @param[in] value New signal value.
- */
-void HSA_API hsa_signal_silent_store_relaxed(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_silent_store_relaxed
- */
-void HSA_API hsa_signal_silent_store_screlease(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @brief Atomically set the value of a signal and return its previous value.
- *
- * @details If the value of the signal is changed, all the agents waiting
- * on @p signal for which @p value satisfies their wait condition are awakened.
- *
- * @param[in] signal Signal. If @p signal is a queue doorbell signal, the
- * behavior is undefined.
- *
- * @param[in] value New value.
- *
- * @return Value of the signal prior to the exchange.
- *
- */
-hsa_signal_value_t HSA_API hsa_signal_exchange_scacq_screl(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @deprecated Renamed as ::hsa_signal_exchange_scacq_screl.
- *
- * @copydoc hsa_signal_exchange_scacq_screl
- */
-hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_exchange_acq_rel(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_exchange_scacq_screl
- */
-hsa_signal_value_t HSA_API hsa_signal_exchange_scacquire(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @deprecated Renamed as ::hsa_signal_exchange_scacquire.
- *
- * @copydoc hsa_signal_exchange_scacquire
- */
-hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_exchange_acquire(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_exchange_scacq_screl
- */
-hsa_signal_value_t HSA_API hsa_signal_exchange_relaxed(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-/**
- * @copydoc hsa_signal_exchange_scacq_screl
- */
-hsa_signal_value_t HSA_API hsa_signal_exchange_screlease(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @deprecated Renamed as ::hsa_signal_exchange_screlease.
- *
- * @copydoc hsa_signal_exchange_screlease
- */
-hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_exchange_release(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @brief Atomically set the value of a signal if the observed value is equal to
- * the expected value. The observed value is returned regardless of whether the
- * replacement was done.
- *
- * @details If the value of the signal is changed, all the agents waiting
- * on @p signal for which @p value satisfies their wait condition are awakened.
- *
- * @param[in] signal Signal. If @p signal is a queue
- * doorbell signal, the behavior is undefined.
- *
- * @param[in] expected Value to compare with.
- *
- * @param[in] value New value.
- *
- * @return Observed value of the signal.
- *
- */
-hsa_signal_value_t HSA_API hsa_signal_cas_scacq_screl(
-    hsa_signal_t signal,
-    hsa_signal_value_t expected,
-    hsa_signal_value_t value);
-
-
-/**
- * @deprecated Renamed as ::hsa_signal_cas_scacq_screl.
- *
- * @copydoc hsa_signal_cas_scacq_screl
- */
-hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_cas_acq_rel(
-    hsa_signal_t signal,
-    hsa_signal_value_t expected,
-    hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_cas_scacq_screl
- */
-hsa_signal_value_t HSA_API hsa_signal_cas_scacquire(
-    hsa_signal_t signal,
-    hsa_signal_value_t expected,
-    hsa_signal_value_t value);
-
-/**
- * @deprecated Renamed as ::hsa_signal_cas_scacquire.
- *
- * @copydoc hsa_signal_cas_scacquire
- */
-hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_cas_acquire(
-    hsa_signal_t signal,
-    hsa_signal_value_t expected,
-    hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_cas_scacq_screl
- */
-hsa_signal_value_t HSA_API hsa_signal_cas_relaxed(
-    hsa_signal_t signal,
-    hsa_signal_value_t expected,
-    hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_cas_scacq_screl
- */
-hsa_signal_value_t HSA_API hsa_signal_cas_screlease(
-    hsa_signal_t signal,
-    hsa_signal_value_t expected,
-    hsa_signal_value_t value);
-
-/**
- * @deprecated Renamed as ::hsa_signal_cas_screlease.
- *
- * @copydoc hsa_signal_cas_screlease
- */
-hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_cas_release(
-    hsa_signal_t signal,
-    hsa_signal_value_t expected,
-    hsa_signal_value_t value);
-
-/**
- * @brief Atomically increment the value of a signal by a given amount.
- *
- * @details If the value of the signal is changed, all the agents waiting on
- * @p signal for which @p value satisfies their wait condition are awakened.
- *
- * @param[in] signal Signal. If @p signal is a queue doorbell signal, the
- * behavior is undefined.
- *
- * @param[in] value Value to add to the value of the signal.
- *
- */
-void HSA_API hsa_signal_add_scacq_screl(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @deprecated Renamed as ::hsa_signal_add_scacq_screl.
- *
- * @copydoc hsa_signal_add_scacq_screl
- */
-void HSA_API HSA_DEPRECATED hsa_signal_add_acq_rel(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_add_scacq_screl
- */
-void HSA_API hsa_signal_add_scacquire(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @deprecated Renamed as ::hsa_signal_add_scacquire.
- *
- * @copydoc hsa_signal_add_scacquire
- */
-void HSA_API HSA_DEPRECATED hsa_signal_add_acquire(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_add_scacq_screl
- */
-void HSA_API hsa_signal_add_relaxed(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_add_scacq_screl
- */
-void HSA_API hsa_signal_add_screlease(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-
-/**
- * @deprecated Renamed as ::hsa_signal_add_screlease.
- *
- * @copydoc hsa_signal_add_screlease
- */
-void HSA_API HSA_DEPRECATED hsa_signal_add_release(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @brief Atomically decrement the value of a signal by a given amount.
- *
- * @details If the value of the signal is changed, all the agents waiting on
- * @p signal for which @p value satisfies their wait condition are awakened.
- *
- * @param[in] signal Signal. If @p signal is a queue doorbell signal, the
- * behavior is undefined.
- *
- * @param[in] value Value to subtract from the value of the signal.
- *
- */
-void HSA_API hsa_signal_subtract_scacq_screl(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-
-/**
- * @deprecated Renamed as ::hsa_signal_subtract_scacq_screl.
- *
- * @copydoc hsa_signal_subtract_scacq_screl
- */
-void HSA_API HSA_DEPRECATED hsa_signal_subtract_acq_rel(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_subtract_scacq_screl
- */
-void HSA_API hsa_signal_subtract_scacquire(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @deprecated Renamed as ::hsa_signal_subtract_scacquire.
- *
- * @copydoc hsa_signal_subtract_scacquire
- */
-void HSA_API HSA_DEPRECATED hsa_signal_subtract_acquire(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_subtract_scacq_screl
- */
-void HSA_API hsa_signal_subtract_relaxed(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_subtract_scacq_screl
- */
-void HSA_API hsa_signal_subtract_screlease(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-
-/**
- * @deprecated Renamed as ::hsa_signal_subtract_screlease.
- *
- * @copydoc hsa_signal_subtract_screlease
- */
-void HSA_API HSA_DEPRECATED hsa_signal_subtract_release(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @brief Atomically perform a bitwise AND operation between the value of a
- * signal and a given value.
- *
- * @details If the value of the signal is changed, all the agents waiting on
- * @p signal for which @p value satisfies their wait condition are awakened.
- *
- * @param[in] signal Signal. If @p signal is a queue doorbell signal, the
- * behavior is undefined.
- *
- * @param[in] value Value to AND with the value of the signal.
- *
- */
-void HSA_API hsa_signal_and_scacq_screl(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @deprecated Renamed as ::hsa_signal_and_scacq_screl.
- *
- * @copydoc hsa_signal_and_scacq_screl
- */
-void HSA_API HSA_DEPRECATED hsa_signal_and_acq_rel(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_and_scacq_screl
- */
-void HSA_API hsa_signal_and_scacquire(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @deprecated Renamed as ::hsa_signal_and_scacquire.
- *
- * @copydoc hsa_signal_and_scacquire
- */
-void HSA_API HSA_DEPRECATED hsa_signal_and_acquire(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_and_scacq_screl
- */
-void HSA_API hsa_signal_and_relaxed(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_and_scacq_screl
- */
-void HSA_API hsa_signal_and_screlease(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-
-/**
- * @deprecated Renamed as ::hsa_signal_and_screlease.
- *
- * @copydoc hsa_signal_and_screlease
- */
-void HSA_API HSA_DEPRECATED hsa_signal_and_release(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @brief Atomically perform a bitwise OR operation between the value of a
- * signal and a given value.
- *
- * @details If the value of the signal is changed, all the agents waiting on
- * @p signal for which @p value satisfies their wait condition are awakened.
- *
- * @param[in] signal Signal. If @p signal is a queue doorbell signal, the
- * behavior is undefined.
- *
- * @param[in] value Value to OR with the value of the signal.
- */
-void HSA_API hsa_signal_or_scacq_screl(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-
-/**
- * @deprecated Renamed as ::hsa_signal_or_scacq_screl.
- *
- * @copydoc hsa_signal_or_scacq_screl
- */
-void HSA_API HSA_DEPRECATED hsa_signal_or_acq_rel(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_or_scacq_screl
- */
-void HSA_API hsa_signal_or_scacquire(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @deprecated Renamed as ::hsa_signal_or_scacquire.
- *
- * @copydoc hsa_signal_or_scacquire
- */
-void HSA_API HSA_DEPRECATED hsa_signal_or_acquire(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_or_scacq_screl
- */
-void HSA_API hsa_signal_or_relaxed(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_or_scacq_screl
- */
-void HSA_API hsa_signal_or_screlease(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @deprecated Renamed as ::hsa_signal_or_screlease.
- *
- * @copydoc hsa_signal_or_screlease
- */
-void HSA_API HSA_DEPRECATED hsa_signal_or_release(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @brief Atomically perform a bitwise XOR operation between the value of a
- * signal and a given value.
- *
- * @details If the value of the signal is changed, all the agents waiting on
- * @p signal for which @p value satisfies their wait condition are awakened.
- *
- * @param[in] signal Signal. If @p signal is a queue doorbell signal, the
- * behavior is undefined.
- *
- * @param[in] value Value to XOR with the value of the signal.
- *
- */
-void HSA_API hsa_signal_xor_scacq_screl(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-
-/**
- * @deprecated Renamed as ::hsa_signal_xor_scacq_screl.
- *
- * @copydoc hsa_signal_xor_scacq_screl
- */
-void HSA_API HSA_DEPRECATED hsa_signal_xor_acq_rel(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_xor_scacq_screl
- */
-void HSA_API hsa_signal_xor_scacquire(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @deprecated Renamed as ::hsa_signal_xor_scacquire.
- *
- * @copydoc hsa_signal_xor_scacquire
- */
-void HSA_API HSA_DEPRECATED hsa_signal_xor_acquire(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_xor_scacq_screl
- */
-void HSA_API hsa_signal_xor_relaxed(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @copydoc hsa_signal_xor_scacq_screl
- */
-void HSA_API hsa_signal_xor_screlease(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @deprecated Renamed as ::hsa_signal_xor_screlease.
- *
- * @copydoc hsa_signal_xor_screlease
- */
-void HSA_API HSA_DEPRECATED hsa_signal_xor_release(
-    hsa_signal_t signal,
-    hsa_signal_value_t value);
-
-/**
- * @brief Wait condition operator.
- */
-typedef enum {
-    /**
-     * The two operands are equal.
-     */
-    HSA_SIGNAL_CONDITION_EQ = 0,
-    /**
-     * The two operands are not equal.
-     */
-    HSA_SIGNAL_CONDITION_NE = 1,
-    /**
-     * The first operand is less than the second operand.
-     */
-    HSA_SIGNAL_CONDITION_LT = 2,
-    /**
-     * The first operand is greater than or equal to the second operand.
-     */
-    HSA_SIGNAL_CONDITION_GTE = 3
-} hsa_signal_condition_t;
-
-/**
- * @brief State of the application thread during a signal wait.
- */
-typedef enum {
-    /**
-     * The application thread may be rescheduled while waiting on the signal.
-     */
-    HSA_WAIT_STATE_BLOCKED = 0,
-    /**
-     * The application thread stays active while waiting on a signal.
-     */
-    HSA_WAIT_STATE_ACTIVE = 1
-} hsa_wait_state_t;
-
-
-/**
- * @brief Wait until a signal value satisfies a specified condition, or a
- * certain amount of time has elapsed.
- *
- * @details A wait operation can spuriously resume at any time sooner than the
- * timeout (for example, due to system or other external factors) even when the
- * condition has not been met.
- *
- * The function is guaranteed to return if the signal value satisfies the
- * condition at some point in time during the wait, but the value returned to
- * the application might not satisfy the condition. The application must ensure
- * that signals are used in such way that wait wakeup conditions are not
- * invalidated before dependent threads have woken up.
- *
- * When the wait operation internally loads the value of the passed signal, it
- * uses the memory order indicated in the function name.
- *
- * @param[in] signal Signal.
- *
- * @param[in] condition Condition used to compare the signal value with @p
- * compare_value.
- *
- * @param[in] compare_value Value to compare with.
- *
- * @param[in] timeout_hint Maximum duration of the wait.  Specified in the same
- * unit as the system timestamp. The operation might block for a shorter or
- * longer time even if the condition is not met. A value of UINT64_MAX indicates
- * no maximum.
- *
- * @param[in] wait_state_hint Hint used by the application to indicate the
- * preferred waiting state. The actual waiting state is ultimately decided by
- * HSA runtime and may not match the provided hint. A value of
- * ::HSA_WAIT_STATE_ACTIVE may improve the latency of response to a signal
- * update by avoiding rescheduling overhead.
- *
- * @return Observed value of the signal, which might not satisfy the specified
- * condition.
- *
-*/
-hsa_signal_value_t HSA_API hsa_signal_wait_scacquire(
-    hsa_signal_t signal,
-    hsa_signal_condition_t condition,
-    hsa_signal_value_t compare_value,
-    uint64_t timeout_hint,
-    hsa_wait_state_t wait_state_hint);
-
-/**
- * @copydoc hsa_signal_wait_scacquire
- */
-hsa_signal_value_t HSA_API hsa_signal_wait_relaxed(
-    hsa_signal_t signal,
-    hsa_signal_condition_t condition,
-    hsa_signal_value_t compare_value,
-    uint64_t timeout_hint,
-    hsa_wait_state_t wait_state_hint);
-
-/**
- * @deprecated Renamed as ::hsa_signal_wait_scacquire.
- *
- * @copydoc hsa_signal_wait_scacquire
- */
-hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_wait_acquire(
-    hsa_signal_t signal,
-    hsa_signal_condition_t condition,
-    hsa_signal_value_t compare_value,
-    uint64_t timeout_hint,
-    hsa_wait_state_t wait_state_hint);
-
-/**
- * @brief Group of signals.
- */
-typedef struct hsa_signal_group_s {
-  /**
-   * Opaque handle. Two handles reference the same object of the enclosing type
-   * if and only if they are equal.
-   */
-  uint64_t handle;
-} hsa_signal_group_t;
-
-/**
- * @brief Create a signal group.
- *
- * @param[in] num_signals Number of elements in @p signals. Must not be 0.
- *
- * @param[in] signals List of signals in the group. The list must not contain
- * any repeated elements. Must not be NULL.
- *
- * @param[in] num_consumers Number of elements in @p consumers. Must not be 0.
- *
- * @param[in] consumers List of agents that might consume (wait on) the signal
- * group. The list must not contain repeated elements, and must be a subset of
- * the set of agents that are allowed to wait on all the signals in the
- * group. If an agent not listed in @p consumers waits on the returned group,
- * the behavior is undefined. The memory associated with @p consumers can be
- * reused or freed after the function returns. Must not be NULL.
- *
- * @param[out] signal_group Pointer to newly created signal group. Must not be
- * NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
- * the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p num_signals is 0, @p signals
- * is NULL, @p num_consumers is 0, @p consumers is NULL, or @p signal_group is
- * NULL.
- */
-hsa_status_t HSA_API hsa_signal_group_create(
-    uint32_t num_signals,
-    const hsa_signal_t *signals,
-    uint32_t num_consumers,
-    const hsa_agent_t *consumers,
-    hsa_signal_group_t *signal_group);
-
-/**
- * @brief Destroy a signal group previous created by ::hsa_signal_group_create.
- *
- * @param[in] signal_group Signal group.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_SIGNAL_GROUP @p signal_group is invalid.
- */
-hsa_status_t HSA_API hsa_signal_group_destroy(
-    hsa_signal_group_t signal_group);
-
-/**
- * @brief Wait until the value of at least one of the signals in a signal group
- * satisfies its associated condition.
- *
- * @details The function is guaranteed to return if the value of at least one of
- * the signals in the group satisfies its associated condition at some point in
- * time during the wait, but the signal value returned to the application may no
- * longer satisfy the condition. The application must ensure that signals in the
- * group are used in such way that wait wakeup conditions are not invalidated
- * before dependent threads have woken up.
- *
- * When this operation internally loads the value of the passed signal, it uses
- * the memory order indicated in the function name.
- *
- * @param[in] signal_group Signal group.
- *
- * @param[in] conditions List of conditions. Each condition, and the value at
- * the same index in @p compare_values, is used to compare the value of the
- * signal at that index in @p signal_group (the signal passed by the application
- * to ::hsa_signal_group_create at that particular index). The size of @p
- * conditions must not be smaller than the number of signals in @p signal_group;
- * any extra elements are ignored. Must not be NULL.
- *
- * @param[in] compare_values List of comparison values.  The size of @p
- * compare_values must not be smaller than the number of signals in @p
- * signal_group; any extra elements are ignored. Must not be NULL.
- *
- * @param[in] wait_state_hint Hint used by the application to indicate the
- * preferred waiting state. The actual waiting state is decided by the HSA runtime
- * and may not match the provided hint. A value of ::HSA_WAIT_STATE_ACTIVE may
- * improve the latency of response to a signal update by avoiding rescheduling
- * overhead.
- *
- * @param[out] signal Signal in the group that satisfied the associated
- * condition. If several signals satisfied their condition, the function can
- * return any of those signals. Must not be NULL.
- *
- * @param[out] value Observed value for @p signal, which might no longer satisfy
- * the specified condition. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_SIGNAL_GROUP @p signal_group is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p conditions is NULL, @p
- * compare_values is NULL, @p signal is NULL, or @p value is NULL.
- */
-hsa_status_t HSA_API hsa_signal_group_wait_any_scacquire(
-    hsa_signal_group_t signal_group,
-    const hsa_signal_condition_t *conditions,
-    const hsa_signal_value_t *compare_values,
-    hsa_wait_state_t wait_state_hint,
-    hsa_signal_t *signal,
-    hsa_signal_value_t *value);
-
-/**
- * @copydoc hsa_signal_group_wait_any_scacquire
- */
-hsa_status_t HSA_API hsa_signal_group_wait_any_relaxed(
-    hsa_signal_group_t signal_group,
-    const hsa_signal_condition_t *conditions,
-    const hsa_signal_value_t *compare_values,
-    hsa_wait_state_t wait_state_hint,
-    hsa_signal_t *signal,
-    hsa_signal_value_t *value);
-
-/** @} */
-
-/** \defgroup memory Memory
- *  @{
- */
-
-/**
- * @brief A memory region represents a block of virtual memory with certain
- * properties. For example, the HSA runtime represents fine-grained memory in
- * the global segment using a region. A region might be associated with more
- * than one agent.
- */
-typedef struct hsa_region_s {
-  /**
-   * Opaque handle. Two handles reference the same object of the enclosing type
-   * if and only if they are equal.
-   */
-  uint64_t handle;
-} hsa_region_t;
-
-/** @} */
-
-
-/** \defgroup queue Queues
- *  @{
- */
-
-/**
- * @brief Queue type. Intended to be used for dynamic queue protocol
- * determination.
- */
-typedef enum {
-  /**
-   * Queue supports multiple producers. Use of multiproducer queue mechanics is
-   * required.
-   */
-  HSA_QUEUE_TYPE_MULTI = 0,
-  /**
-   * Queue only supports a single producer. In some scenarios, the application
-   * may want to limit the submission of AQL packets to a single agent. Queues
-   * that support a single producer may be more efficient than queues supporting
-   * multiple producers. Use of multiproducer queue mechanics is not supported.
-   */
-  HSA_QUEUE_TYPE_SINGLE = 1,
-  /**
-   * Queue supports multiple producers and cooperative dispatches. Cooperative
-   * dispatches are able to use GWS synchronization. Queues of this type may be
-   * limited in number. The runtime may return the same queue to serve multiple
-   * ::hsa_queue_create calls when this type is given. Callers must inspect the
-   * returned queue to discover queue size. Queues of this type are reference
-   * counted and require a matching number of ::hsa_queue_destroy calls to
-   * release. Use of multiproducer queue mechanics is required. See
-   * ::HSA_AMD_AGENT_INFO_COOPERATIVE_QUEUES to query agent support for this
-   * type.
-   */
-  HSA_QUEUE_TYPE_COOPERATIVE = 2
-} hsa_queue_type_t;
-
-/**
- * @brief A fixed-size type used to represent ::hsa_queue_type_t constants.
- */
-typedef uint32_t hsa_queue_type32_t;
-
-/**
- * @brief Queue features.
- */
-typedef enum {
-  /**
-   * Queue supports kernel dispatch packets.
-   */
-  HSA_QUEUE_FEATURE_KERNEL_DISPATCH = 1,
-
-  /**
-   * Queue supports agent dispatch packets.
-   */
-  HSA_QUEUE_FEATURE_AGENT_DISPATCH = 2
-} hsa_queue_feature_t;
-
-/**
- * @brief User mode queue.
- *
- * @details The queue structure is read-only and allocated by the HSA runtime,
- * but agents can directly modify the contents of the buffer pointed by @a
- * base_address, or use HSA runtime APIs to access the doorbell signal.
- *
- */
-typedef struct hsa_queue_s {
-  /**
-   * Queue type.
-   */
-  hsa_queue_type32_t type;
-
-  /**
-   * Queue features mask. This is a bit-field of ::hsa_queue_feature_t
-   * values. Applications should ignore any unknown set bits.
-   */
-  uint32_t features;
-
-#ifdef HSA_LARGE_MODEL
-  void* base_address;
-#elif defined HSA_LITTLE_ENDIAN
-  /**
-   * Starting address of the HSA runtime-allocated buffer used to store the AQL
-   * packets. Must be aligned to the size of an AQL packet.
-   */
-  void* base_address;
-  /**
-   * Reserved. Must be 0.
-   */
-  uint32_t reserved0;
-#else
-  uint32_t reserved0;
-  void* base_address;
-#endif
-
-  /**
-   * Signal object used by the application to indicate the ID of a packet that
-   * is ready to be processed. The HSA runtime manages the doorbell signal. If
-   * the application tries to replace or destroy this signal, the behavior is
-   * undefined.
-   *
-   * If @a type is ::HSA_QUEUE_TYPE_SINGLE, the doorbell signal value must be
-   * updated in a monotonically increasing fashion. If @a type is
-   * ::HSA_QUEUE_TYPE_MULTI, the doorbell signal value can be updated with any
-   * value.
-   */
-  hsa_signal_t doorbell_signal;
-
-  /**
-   * Maximum number of packets the queue can hold. Must be a power of 2.
-   */
-  uint32_t size;
-  /**
-   * Reserved. Must be 0.
-   */
-  uint32_t reserved1;
-  /**
-   * Queue identifier, which is unique over the lifetime of the application.
-   */
-  uint64_t id;
-
-} hsa_queue_t;
-
-/**
- * @brief Create a user mode queue.
- *
- * @details The HSA runtime creates the queue structure, the underlying packet
- * buffer, the completion signal, and the write and read indexes. The initial
- * value of the write and read indexes is 0. The type of every packet in the
- * buffer is initialized to ::HSA_PACKET_TYPE_INVALID.
- *
- * The application should only rely on the error code returned to determine if
- * the queue is valid.
- *
- * @param[in] agent Agent where to create the queue.
- *
- * @param[in] size Number of packets the queue is expected to
- * hold. Must be a power of 2 between 1 and the value of
- * ::HSA_AGENT_INFO_QUEUE_MAX_SIZE in @p agent. The size of the newly
- * created queue is the maximum of @p size and the value of
- * ::HSA_AGENT_INFO_QUEUE_MIN_SIZE in @p agent.
- *
- * @param[in] type Type of the queue, a bitwise OR of hsa_queue_type_t values.
- * If the value of ::HSA_AGENT_INFO_QUEUE_TYPE in @p agent is ::HSA_QUEUE_TYPE_SINGLE,
- * then @p type must also be ::HSA_QUEUE_TYPE_SINGLE.
- *
- * @param[in] callback Callback invoked by the HSA runtime for every
- * asynchronous event related to the newly created queue. May be NULL. The HSA
- * runtime passes three arguments to the callback: a code identifying the event
- * that triggered the invocation, a pointer to the queue where the event
- * originated, and the application data.
- *
- * @param[in] data Application data that is passed to @p callback on every
- * iteration. May be NULL.
- *
- * @param[in] private_segment_size Hint indicating the maximum
- * expected private segment usage per work-item, in bytes. There may
- * be performance degradation if the application places a kernel
- * dispatch packet in the queue and the corresponding private segment
- * usage exceeds @p private_segment_size. If the application does not
- * want to specify any particular value for this argument, @p
- * private_segment_size must be UINT32_MAX. If the queue does not
- * support kernel dispatch packets, this argument is ignored.
- *
- * @param[in] group_segment_size Hint indicating the maximum expected
- * group segment usage per work-group, in bytes. There may be
- * performance degradation if the application places a kernel dispatch
- * packet in the queue and the corresponding group segment usage
- * exceeds @p group_segment_size. If the application does not want to
- * specify any particular value for this argument, @p
- * group_segment_size must be UINT32_MAX. If the queue does not
- * support kernel dispatch packets, this argument is ignored.
- *
- * @param[out] queue Memory location where the HSA runtime stores a pointer to
- * the newly created queue.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
- * the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE_CREATION @p agent does not
- * support queues of the given type.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p size is not a power of two,
- * @p size is 0, @p type is an invalid queue type, or @p queue is NULL.
- *
- */
-hsa_status_t HSA_API hsa_queue_create(
-    hsa_agent_t agent,
-    uint32_t size,
-    hsa_queue_type32_t type,
-    void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data),
-    void *data,
-    uint32_t private_segment_size,
-    uint32_t group_segment_size,
-    hsa_queue_t **queue);
-
-/**
- * @brief Create a queue for which the application or a kernel is responsible
- * for processing the AQL packets.
- *
- * @details The application can use this function to create queues where AQL
- * packets are not parsed by the packet processor associated with an agent,
- * but rather by a unit of execution running on that agent (for example, a
- * thread in the host application).
- *
- * The application is responsible for ensuring that all the producers and
- * consumers of the resulting queue can access the provided doorbell signal
- * and memory region. The application is also responsible for ensuring that the
- * unit of execution processing the queue packets supports the indicated
- * features (AQL packet types).
- *
- * When the queue is created, the HSA runtime allocates the packet buffer using
- * @p region, and the write and read indexes. The initial value of the write and
- * read indexes is 0, and the type of every packet in the buffer is initialized
- * to ::HSA_PACKET_TYPE_INVALID. The value of the @e size, @e type, @e features,
- * and @e doorbell_signal fields in the returned queue match the values passed
- * by the application.
- *
- * @param[in] region Memory region that the HSA runtime should use to allocate
- * the AQL packet buffer and any other queue metadata.
- *
- * @param[in] size Number of packets the queue is expected to hold. Must be a
- * power of 2 greater than 0.
- *
- * @param[in] type Queue type.
- *
- * @param[in] features Supported queue features. This is a bit-field of
- * ::hsa_queue_feature_t values.
- *
- * @param[in] doorbell_signal Doorbell signal that the HSA runtime must
- * associate with the returned queue. The signal handle must not be 0.
- *
- * @param[out] queue Memory location where the HSA runtime stores a pointer to
- * the newly created queue. The application should not rely on the value
- * returned for this argument but only in the status code to determine if the
- * queue is valid. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
- * the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p size is not a power of two, @p
- * size is 0, @p type is an invalid queue type, the doorbell signal handle is
- * 0, or @p queue is NULL.
- *
- */
-hsa_status_t HSA_API hsa_soft_queue_create(
-    hsa_region_t region,
-    uint32_t size,
-    hsa_queue_type32_t type,
-    uint32_t features,
-    hsa_signal_t doorbell_signal,
-    hsa_queue_t **queue);
-
-/**
- * @brief Destroy a user mode queue.
- *
- * @details When a queue is destroyed, the state of the AQL packets that have
- * not been yet fully processed (their completion phase has not finished)
- * becomes undefined. It is the responsibility of the application to ensure that
- * all pending queue operations are finished if their results are required.
- *
- * The resources allocated by the HSA runtime during queue creation (queue
- * structure, ring buffer, doorbell signal) are released.  The queue should not
- * be accessed after being destroyed.
- *
- * @param[in] queue Pointer to a queue created using ::hsa_queue_create.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE The queue is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p queue is NULL.
- */
-hsa_status_t HSA_API hsa_queue_destroy(
-    hsa_queue_t *queue);
-
-/**
- * @brief Inactivate a queue.
- *
- * @details Inactivating the queue aborts any pending executions and prevent any
- * new packets from being processed. Any more packets written to the queue once
- * it is inactivated will be ignored by the packet processor.
- *
- * @param[in] queue Pointer to a queue.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE The queue is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p queue is NULL.
- */
-hsa_status_t HSA_API hsa_queue_inactivate(
-    hsa_queue_t *queue);
-
-/**
- * @deprecated Renamed as ::hsa_queue_load_read_index_scacquire.
- *
- * @copydoc hsa_queue_load_read_index_scacquire
- */
-uint64_t HSA_API HSA_DEPRECATED hsa_queue_load_read_index_acquire(
-    const hsa_queue_t *queue);
-
-/**
- * @brief Atomically load the read index of a queue.
- *
- * @param[in] queue Pointer to a queue.
- *
- * @return Read index of the queue pointed by @p queue.
- */
-uint64_t HSA_API hsa_queue_load_read_index_scacquire(
-    const hsa_queue_t *queue);
-
-/**
- * @copydoc hsa_queue_load_read_index_scacquire
- */
-uint64_t HSA_API hsa_queue_load_read_index_relaxed(
-    const hsa_queue_t *queue);
-
-/**
- * @deprecated Renamed as ::hsa_queue_load_write_index_scacquire.
- *
- * @copydoc hsa_queue_load_write_index_scacquire
- */
-uint64_t HSA_API HSA_DEPRECATED hsa_queue_load_write_index_acquire(
-    const hsa_queue_t *queue);
-
-/**
- * @brief Atomically load the write index of a queue.
- *
- * @param[in] queue Pointer to a queue.
- *
- * @return Write index of the queue pointed by @p queue.
- */
-uint64_t HSA_API hsa_queue_load_write_index_scacquire(
-    const hsa_queue_t *queue);
-
-/**
- * @copydoc hsa_queue_load_write_index_scacquire
- */
-uint64_t HSA_API hsa_queue_load_write_index_relaxed(
-    const hsa_queue_t *queue);
-
-/**
- * @brief Atomically set the write index of a queue.
- *
- * @details It is recommended that the application uses this function to update
- * the write index when there is a single agent submitting work to the queue
- * (the queue type is ::HSA_QUEUE_TYPE_SINGLE).
- *
- * @param[in] queue Pointer to a queue.
- *
- * @param[in] value Value to assign to the write index.
- *
- */
-void HSA_API hsa_queue_store_write_index_relaxed(
-    const hsa_queue_t *queue,
-    uint64_t value);
-
-/**
- * @deprecated Renamed as ::hsa_queue_store_write_index_screlease.
- *
- * @copydoc hsa_queue_store_write_index_screlease
- */
-void HSA_API HSA_DEPRECATED hsa_queue_store_write_index_release(
-    const hsa_queue_t *queue,
-    uint64_t value);
-
-/**
- * @copydoc hsa_queue_store_write_index_relaxed
- */
-void HSA_API hsa_queue_store_write_index_screlease(
-    const hsa_queue_t *queue,
-    uint64_t value);
-
-/**
- * @deprecated Renamed as ::hsa_queue_cas_write_index_scacq_screl.
- *
- * @copydoc hsa_queue_cas_write_index_scacq_screl
- */
-uint64_t HSA_API HSA_DEPRECATED hsa_queue_cas_write_index_acq_rel(
-    const hsa_queue_t *queue,
-    uint64_t expected,
-    uint64_t value);
-
-/**
- * @brief Atomically set the write index of a queue if the observed value is
- * equal to the expected value. The application can inspect the returned value
- * to determine if the replacement was done.
- *
- * @param[in] queue Pointer to a queue.
- *
- * @param[in] expected Expected value.
- *
- * @param[in] value Value to assign to the write index if @p expected matches
- * the observed write index. Must be greater than @p expected.
- *
- * @return Previous value of the write index.
- */
-uint64_t HSA_API hsa_queue_cas_write_index_scacq_screl(
-    const hsa_queue_t *queue,
-    uint64_t expected,
-    uint64_t value);
-
-/**
- * @deprecated Renamed as ::hsa_queue_cas_write_index_scacquire.
- *
- * @copydoc hsa_queue_cas_write_index_scacquire
- */
-uint64_t HSA_API HSA_DEPRECATED hsa_queue_cas_write_index_acquire(
-    const hsa_queue_t *queue,
-    uint64_t expected,
-    uint64_t value);
-
-/**
- * @copydoc hsa_queue_cas_write_index_scacq_screl
- */
-uint64_t HSA_API hsa_queue_cas_write_index_scacquire(
-    const hsa_queue_t *queue,
-    uint64_t expected,
-    uint64_t value);
-
-/**
- * @copydoc hsa_queue_cas_write_index_scacq_screl
- */
-uint64_t HSA_API hsa_queue_cas_write_index_relaxed(
-    const hsa_queue_t *queue,
-    uint64_t expected,
-    uint64_t value);
-
-/**
- * @deprecated Renamed as ::hsa_queue_cas_write_index_screlease.
- *
- * @copydoc hsa_queue_cas_write_index_screlease
- */
-uint64_t HSA_API HSA_DEPRECATED hsa_queue_cas_write_index_release(
-    const hsa_queue_t *queue,
-    uint64_t expected,
-    uint64_t value);
-
-/**
- * @copydoc hsa_queue_cas_write_index_scacq_screl
- */
-uint64_t HSA_API hsa_queue_cas_write_index_screlease(
-    const hsa_queue_t *queue,
-    uint64_t expected,
-    uint64_t value);
-
-/**
- * @deprecated Renamed as ::hsa_queue_add_write_index_scacq_screl.
- *
- * @copydoc hsa_queue_add_write_index_scacq_screl
- */
-uint64_t HSA_API HSA_DEPRECATED hsa_queue_add_write_index_acq_rel(
-    const hsa_queue_t *queue,
-    uint64_t value);
-
-/**
- * @brief Atomically increment the write index of a queue by an offset.
- *
- * @param[in] queue Pointer to a queue.
- *
- * @param[in] value Value to add to the write index.
- *
- * @return Previous value of the write index.
- */
-uint64_t HSA_API hsa_queue_add_write_index_scacq_screl(
-    const hsa_queue_t *queue,
-    uint64_t value);
-
-/**
- * @deprecated Renamed as ::hsa_queue_add_write_index_scacquire.
- *
- * @copydoc hsa_queue_add_write_index_scacquire
- */
-uint64_t HSA_API HSA_DEPRECATED hsa_queue_add_write_index_acquire(
-    const hsa_queue_t *queue,
-    uint64_t value);
-
-/**
- * @copydoc hsa_queue_add_write_index_scacq_screl
- */
-uint64_t HSA_API hsa_queue_add_write_index_scacquire(
-    const hsa_queue_t *queue,
-    uint64_t value);
-
-/**
- * @copydoc hsa_queue_add_write_index_scacq_screl
- */
-uint64_t HSA_API hsa_queue_add_write_index_relaxed(
-    const hsa_queue_t *queue,
-    uint64_t value);
-
-/**
- * @deprecated Renamed as ::hsa_queue_add_write_index_screlease.
- *
- * @copydoc hsa_queue_add_write_index_screlease
- */
-uint64_t HSA_API HSA_DEPRECATED hsa_queue_add_write_index_release(
-    const hsa_queue_t *queue,
-    uint64_t value);
-
-/**
- * @copydoc hsa_queue_add_write_index_scacq_screl
- */
-uint64_t HSA_API hsa_queue_add_write_index_screlease(
-    const hsa_queue_t *queue,
-    uint64_t value);
-
-/**
- * @brief Atomically set the read index of a queue.
- *
- * @details Modifications of the read index are not allowed and result in
- * undefined behavior if the queue is associated with an agent for which
- * only the corresponding packet processor is permitted to update the read
- * index.
- *
- * @param[in] queue Pointer to a queue.
- *
- * @param[in] value Value to assign to the read index.
- *
- */
-void HSA_API hsa_queue_store_read_index_relaxed(
-    const hsa_queue_t *queue,
-    uint64_t value);
-
-/**
- * @deprecated Renamed as ::hsa_queue_store_read_index_screlease.
- *
- * @copydoc hsa_queue_store_read_index_screlease
- */
-void HSA_API HSA_DEPRECATED hsa_queue_store_read_index_release(
-    const hsa_queue_t *queue,
-    uint64_t value);
-
-/**
- * @copydoc hsa_queue_store_read_index_relaxed
- */
-void HSA_API hsa_queue_store_read_index_screlease(
-   const hsa_queue_t *queue,
-   uint64_t value);
-/** @} */
-
-
-/** \defgroup aql Architected Queuing Language
- *  @{
- */
-
-/**
- * @brief Packet type.
- */
-typedef enum {
-  /**
-   * Vendor-specific packet.
-   */
-  HSA_PACKET_TYPE_VENDOR_SPECIFIC = 0,
-  /**
-   * The packet has been processed in the past, but has not been reassigned to
-   * the packet processor. A packet processor must not process a packet of this
-   * type. All queues support this packet type.
-   */
-  HSA_PACKET_TYPE_INVALID = 1,
-  /**
-   * Packet used by agents for dispatching jobs to kernel agents. Not all
-   * queues support packets of this type (see ::hsa_queue_feature_t).
-   */
-  HSA_PACKET_TYPE_KERNEL_DISPATCH = 2,
-  /**
-   * Packet used by agents to delay processing of subsequent packets, and to
-   * express complex dependencies between multiple packets. All queues support
-   * this packet type.
-   */
-  HSA_PACKET_TYPE_BARRIER_AND = 3,
-  /**
-   * Packet used by agents for dispatching jobs to agents.  Not all
-   * queues support packets of this type (see ::hsa_queue_feature_t).
-   */
-  HSA_PACKET_TYPE_AGENT_DISPATCH = 4,
-  /**
-   * Packet used by agents to delay processing of subsequent packets, and to
-   * express complex dependencies between multiple packets. All queues support
-   * this packet type.
-   */
-  HSA_PACKET_TYPE_BARRIER_OR = 5
-} hsa_packet_type_t;
-
-/**
- * @brief Scope of the memory fence operation associated with a packet.
- */
-typedef enum {
-  /**
-   * No scope (no fence is applied). The packet relies on external fences to
-   * ensure visibility of memory updates.
-   */
-  HSA_FENCE_SCOPE_NONE = 0,
-  /**
-   * The fence is applied with agent scope for the global segment.
-   */
-  HSA_FENCE_SCOPE_AGENT = 1,
-  /**
-   * The fence is applied across both agent and system scope for the global
-   * segment.
-   */
-  HSA_FENCE_SCOPE_SYSTEM = 2
-} hsa_fence_scope_t;
-
-/**
- * @brief Sub-fields of the @a header field that is present in any AQL
- * packet. The offset (with respect to the address of @a header) of a sub-field
- * is identical to its enumeration constant. The width of each sub-field is
- * determined by the corresponding value in ::hsa_packet_header_width_t. The
- * offset and the width are expressed in bits.
- */
- typedef enum {
-  /**
-   * Packet type. The value of this sub-field must be one of
-   * ::hsa_packet_type_t. If the type is ::HSA_PACKET_TYPE_VENDOR_SPECIFIC, the
-   * packet layout is vendor-specific.
-   */
-   HSA_PACKET_HEADER_TYPE = 0,
-  /**
-   * Barrier bit. If the barrier bit is set, the processing of the current
-   * packet only launches when all preceding packets (within the same queue) are
-   * complete.
-   */
-   HSA_PACKET_HEADER_BARRIER = 8,
-  /**
-   * Acquire fence scope. The value of this sub-field determines the scope and
-   * type of the memory fence operation applied before the packet enters the
-   * active phase. An acquire fence ensures that any subsequent global segment
-   * or image loads by any unit of execution that belongs to a dispatch that has
-   * not yet entered the active phase on any queue of the same kernel agent,
-   * sees any data previously released at the scopes specified by the acquire
-   * fence. The value of this sub-field must be one of ::hsa_fence_scope_t.
-   */
-   HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE = 9,
-   /**
-    * @deprecated Renamed as ::HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE.
-    */
-   HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE = 9,
-  /**
-   * Release fence scope, The value of this sub-field determines the scope and
-   * type of the memory fence operation applied after kernel completion but
-   * before the packet is completed. A release fence makes any global segment or
-   * image data that was stored by any unit of execution that belonged to a
-   * dispatch that has completed the active phase on any queue of the same
-   * kernel agent visible in all the scopes specified by the release fence. The
-   * value of this sub-field must be one of ::hsa_fence_scope_t.
-   */
-   HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE = 11,
-   /**
-    * @deprecated Renamed as ::HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE.
-    */
-   HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE = 11
- } hsa_packet_header_t;
-
-/**
- * @brief Width (in bits) of the sub-fields in ::hsa_packet_header_t.
- */
- typedef enum {
-   HSA_PACKET_HEADER_WIDTH_TYPE = 8,
-   HSA_PACKET_HEADER_WIDTH_BARRIER = 1,
-   HSA_PACKET_HEADER_WIDTH_SCACQUIRE_FENCE_SCOPE = 2,
-   /**
-    * @deprecated Use HSA_PACKET_HEADER_WIDTH_SCACQUIRE_FENCE_SCOPE.
-    */
-   HSA_PACKET_HEADER_WIDTH_ACQUIRE_FENCE_SCOPE = 2,
-   HSA_PACKET_HEADER_WIDTH_SCRELEASE_FENCE_SCOPE = 2,
-   /**
-    * @deprecated Use HSA_PACKET_HEADER_WIDTH_SCRELEASE_FENCE_SCOPE.
-    */
-   HSA_PACKET_HEADER_WIDTH_RELEASE_FENCE_SCOPE = 2
- } hsa_packet_header_width_t;
-
-/**
- * @brief Sub-fields of the kernel dispatch packet @a setup field. The offset
- * (with respect to the address of @a setup) of a sub-field is identical to its
- * enumeration constant. The width of each sub-field is determined by the
- * corresponding value in ::hsa_kernel_dispatch_packet_setup_width_t. The
- * offset and the width are expressed in bits.
- */
- typedef enum {
-  /**
-   * Number of dimensions of the grid. Valid values are 1, 2, or 3.
-   *
-   */
-   HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS = 0
- } hsa_kernel_dispatch_packet_setup_t;
-
-/**
- * @brief Width (in bits) of the sub-fields in
- * ::hsa_kernel_dispatch_packet_setup_t.
- */
- typedef enum {
-   HSA_KERNEL_DISPATCH_PACKET_SETUP_WIDTH_DIMENSIONS = 2
- } hsa_kernel_dispatch_packet_setup_width_t;
-
-/**
- * @brief AQL kernel dispatch packet
- */
-typedef struct hsa_kernel_dispatch_packet_s {
-  /**
-   * Packet header. Used to configure multiple packet parameters such as the
-   * packet type. The parameters are described by ::hsa_packet_header_t.
-   */
-  uint16_t header;
-
-  /**
-   * Dispatch setup parameters. Used to configure kernel dispatch parameters
-   * such as the number of dimensions in the grid. The parameters are described
-   * by ::hsa_kernel_dispatch_packet_setup_t.
-   */
-  uint16_t setup;
-
-  /**
-   * X dimension of work-group, in work-items. Must be greater than 0.
-   */
-  uint16_t workgroup_size_x;
-
-  /**
-   * Y dimension of work-group, in work-items. Must be greater than
-   * 0. If the grid has 1 dimension, the only valid value is 1.
-   */
-  uint16_t workgroup_size_y;
-
-  /**
-   * Z dimension of work-group, in work-items. Must be greater than
-   * 0. If the grid has 1 or 2 dimensions, the only valid value is 1.
-   */
-  uint16_t workgroup_size_z;
-
-  /**
-   * Reserved. Must be 0.
-   */
-  uint16_t reserved0;
-
-  /**
-   * X dimension of grid, in work-items. Must be greater than 0. Must
-   * not be smaller than @a workgroup_size_x.
-   */
-  uint32_t grid_size_x;
-
-  /**
-   * Y dimension of grid, in work-items. Must be greater than 0. If the grid has
-   * 1 dimension, the only valid value is 1. Must not be smaller than @a
-   * workgroup_size_y.
-   */
-  uint32_t grid_size_y;
-
-  /**
-   * Z dimension of grid, in work-items. Must be greater than 0. If the grid has
-   * 1 or 2 dimensions, the only valid value is 1. Must not be smaller than @a
-   * workgroup_size_z.
-   */
-  uint32_t grid_size_z;
-
-  /**
-   * Size in bytes of private memory allocation request (per work-item).
-   */
-  uint32_t private_segment_size;
-
-  /**
-   * Size in bytes of group memory allocation request (per work-group). Must not
-   * be less than the sum of the group memory used by the kernel (and the
-   * functions it calls directly or indirectly) and the dynamically allocated
-   * group segment variables.
-   */
-  uint32_t group_segment_size;
-
-  /**
-   * Opaque handle to a code object that includes an implementation-defined
-   * executable code for the kernel.
-   */
-  uint64_t kernel_object;
-
-#ifdef HSA_LARGE_MODEL
-  void* kernarg_address;
-#elif defined HSA_LITTLE_ENDIAN
-  /**
-   * Pointer to a buffer containing the kernel arguments. May be NULL.
-   *
-   * The buffer must be allocated using ::hsa_memory_allocate, and must not be
-   * modified once the kernel dispatch packet is enqueued until the dispatch has
-   * completed execution.
-   */
-  void* kernarg_address;
-  /**
-   * Reserved. Must be 0.
-   */
-  uint32_t reserved1;
-#else
-  uint32_t reserved1;
-  void* kernarg_address;
-#endif
-
-  /**
-   * Reserved. Must be 0.
-   */
-  uint64_t reserved2;
-
-  /**
-   * Signal used to indicate completion of the job. The application can use the
-   * special signal handle 0 to indicate that no signal is used.
-   */
-  hsa_signal_t completion_signal;
-
-} hsa_kernel_dispatch_packet_t;
-
-/**
- * @brief Agent dispatch packet.
- */
-typedef struct hsa_agent_dispatch_packet_s {
-  /**
-   * Packet header. Used to configure multiple packet parameters such as the
-   * packet type. The parameters are described by ::hsa_packet_header_t.
-   */
-  uint16_t header;
-
-  /**
-   * Application-defined function to be performed by the destination agent.
-   */
-  uint16_t type;
-
-  /**
-   * Reserved. Must be 0.
-   */
-  uint32_t reserved0;
-
-#ifdef HSA_LARGE_MODEL
-  void* return_address;
-#elif defined HSA_LITTLE_ENDIAN
-  /**
-   * Address where to store the function return values, if any.
-   */
-  void* return_address;
-  /**
-   * Reserved. Must be 0.
-   */
-  uint32_t reserved1;
-#else
-  uint32_t reserved1;
-  void* return_address;
-#endif
-
-  /**
-   * Function arguments.
-   */
-  uint64_t arg[4];
-
-  /**
-   * Reserved. Must be 0.
-   */
-  uint64_t reserved2;
-
-  /**
-   * Signal used to indicate completion of the job. The application can use the
-   * special signal handle 0 to indicate that no signal is used.
-   */
-  hsa_signal_t completion_signal;
-
-} hsa_agent_dispatch_packet_t;
-
-/**
- * @brief Barrier-AND packet.
- */
-typedef struct hsa_barrier_and_packet_s {
-  /**
-   * Packet header. Used to configure multiple packet parameters such as the
-   * packet type. The parameters are described by ::hsa_packet_header_t.
-   */
-  uint16_t header;
-
-  /**
-   * Reserved. Must be 0.
-   */
-  uint16_t reserved0;
-
-  /**
-   * Reserved. Must be 0.
-   */
-  uint32_t reserved1;
-
-  /**
-   * Array of dependent signal objects. Signals with a handle value of 0 are
-   * allowed and are interpreted by the packet processor as satisfied
-   * dependencies.
-   */
-  hsa_signal_t dep_signal[5];
-
-  /**
-   * Reserved. Must be 0.
-   */
-  uint64_t reserved2;
-
-  /**
-   * Signal used to indicate completion of the job. The application can use the
-   * special signal handle 0 to indicate that no signal is used.
-   */
-  hsa_signal_t completion_signal;
-
-} hsa_barrier_and_packet_t;
-
-/**
- * @brief Barrier-OR packet.
- */
-typedef struct hsa_barrier_or_packet_s {
-  /**
-   * Packet header. Used to configure multiple packet parameters such as the
-   * packet type. The parameters are described by ::hsa_packet_header_t.
-   */
-  uint16_t header;
-
-  /**
-   * Reserved. Must be 0.
-   */
-  uint16_t reserved0;
-
-  /**
-   * Reserved. Must be 0.
-   */
-  uint32_t reserved1;
-
-  /**
-   * Array of dependent signal objects. Signals with a handle value of 0 are
-   * allowed and are interpreted by the packet processor as dependencies not
-   * satisfied.
-   */
-  hsa_signal_t dep_signal[5];
-
-  /**
-   * Reserved. Must be 0.
-   */
-  uint64_t reserved2;
-
-  /**
-   * Signal used to indicate completion of the job. The application can use the
-   * special signal handle 0 to indicate that no signal is used.
-   */
-  hsa_signal_t completion_signal;
-
-} hsa_barrier_or_packet_t;
-
-/** @} */
-
-/** \addtogroup memory Memory
- *  @{
- */
-
-/**
- * @brief Memory segments associated with a region.
- */
-typedef enum {
-  /**
-   * Global segment. Used to hold data that is shared by all agents.
-   */
-  HSA_REGION_SEGMENT_GLOBAL = 0,
-  /**
-   * Read-only segment. Used to hold data that remains constant during the
-   * execution of a kernel.
-   */
-  HSA_REGION_SEGMENT_READONLY = 1,
-  /**
-   * Private segment. Used to hold data that is local to a single work-item.
-   */
-  HSA_REGION_SEGMENT_PRIVATE = 2,
-  /**
-   * Group segment. Used to hold data that is shared by the work-items of a
-   * work-group.
-  */
-  HSA_REGION_SEGMENT_GROUP = 3,
-  /**
-   * Kernarg segment. Used to store kernel arguments.
-  */
-  HSA_REGION_SEGMENT_KERNARG = 4
-} hsa_region_segment_t;
-
-/**
- * @brief Global region flags.
- */
-typedef enum {
-  /**
-   * The application can use memory in the region to store kernel arguments, and
-   * provide the values for the kernarg segment of a kernel dispatch. If this
-   * flag is set, then ::HSA_REGION_GLOBAL_FLAG_FINE_GRAINED must be set.
-   */
-  HSA_REGION_GLOBAL_FLAG_KERNARG = 1,
-  /**
-   * Updates to memory in this region are immediately visible to all the
-   * agents under the terms of the HSA memory model. If this
-   * flag is set, then ::HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED must not be set.
-   */
-  HSA_REGION_GLOBAL_FLAG_FINE_GRAINED = 2,
-  /**
-   * Updates to memory in this region can be performed by a single agent at
-   * a time. If a different agent in the system is allowed to access the
-   * region, the application must explicitely invoke ::hsa_memory_assign_agent
-   * in order to transfer ownership to that agent for a particular buffer.
-   */
-  HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED = 4
-} hsa_region_global_flag_t;
-
-/**
- * @brief Attributes of a memory region.
- */
-typedef enum {
-  /**
-   * Segment where memory in the region can be used. The type of this
-   * attribute is ::hsa_region_segment_t.
-   */
-  HSA_REGION_INFO_SEGMENT = 0,
-  /**
-   * Flag mask. The value of this attribute is undefined if the value of
-   * ::HSA_REGION_INFO_SEGMENT is not ::HSA_REGION_SEGMENT_GLOBAL. The type of
-   * this attribute is uint32_t, a bit-field of ::hsa_region_global_flag_t
-   * values.
-   */
-  HSA_REGION_INFO_GLOBAL_FLAGS = 1,
-  /**
-   * Size of this region, in bytes. The type of this attribute is size_t.
-   */
-  HSA_REGION_INFO_SIZE = 2,
-  /**
-   * Maximum allocation size in this region, in bytes. Must not exceed the value
-   * of ::HSA_REGION_INFO_SIZE. The type of this attribute is size_t.
-   *
-   * If the region is in the global or readonly segments, this is the maximum
-   * size that the application can pass to ::hsa_memory_allocate.
-   *
-   * If the region is in the group segment, this is the maximum size (per
-   * work-group) that can be requested for a given kernel dispatch. If the
-   * region is in the private segment, this is the maximum size (per work-item)
-   * that can be requested for a specific kernel dispatch, and must be at least
-   * 256 bytes.
-   */
-  HSA_REGION_INFO_ALLOC_MAX_SIZE = 4,
-  /**
-   * Maximum size (per work-group) of private memory that can be requested for a
-   * specific kernel dispatch. Must be at least 65536 bytes. The type of this
-   * attribute is uint32_t. The value of this attribute is undefined if the
-   * region is not in the private segment.
-   */
-  HSA_REGION_INFO_ALLOC_MAX_PRIVATE_WORKGROUP_SIZE = 8,
-  /**
-   * Indicates whether memory in this region can be allocated using
-   * ::hsa_memory_allocate. The type of this attribute is bool.
-   *
-   * The value of this flag is always false for regions in the group and private
-   * segments.
-   */
-  HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED = 5,
-  /**
-   * Allocation granularity of buffers allocated by ::hsa_memory_allocate in
-   * this region. The size of a buffer allocated in this region is a multiple of
-   * the value of this attribute. The value of this attribute is only defined if
-   * ::HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED is true for this region. The type
-   * of this attribute is size_t.
-   */
-  HSA_REGION_INFO_RUNTIME_ALLOC_GRANULE = 6,
-  /**
-   * Alignment of buffers allocated by ::hsa_memory_allocate in this region. The
-   * value of this attribute is only defined if
-   * ::HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED is true for this region, and must be
-   * a power of 2. The type of this attribute is size_t.
-   */
-  HSA_REGION_INFO_RUNTIME_ALLOC_ALIGNMENT = 7
-} hsa_region_info_t;
-
-/**
- * @brief Get the current value of an attribute of a region.
- *
- * @param[in] region A valid region.
- *
- * @param[in] attribute Attribute to query.
- *
- * @param[out] value Pointer to a application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_REGION The region is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
- * region attribute, or @p value is NULL.
- */
-hsa_status_t HSA_API hsa_region_get_info(
-    hsa_region_t region,
-    hsa_region_info_t attribute,
-    void* value);
-
-/**
- * @brief Iterate over the memory regions associated with a given agent, and
- * invoke an application-defined callback on every iteration.
- *
- * @param[in] agent A valid agent.
- *
- * @param[in] callback Callback to be invoked once per region that is
- * accessible from the agent.  The HSA runtime passes two arguments to the
- * callback, the region and the application data.  If @p callback returns a
- * status other than ::HSA_STATUS_SUCCESS for a particular iteration, the
- * traversal stops and ::hsa_agent_iterate_regions returns that status value.
- *
- * @param[in] data Application data that is passed to @p callback on every
- * iteration. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
- */
-hsa_status_t HSA_API hsa_agent_iterate_regions(
-    hsa_agent_t agent,
-    hsa_status_t (*callback)(hsa_region_t region, void* data),
-    void* data);
-
-/**
- * @brief Allocate a block of memory in a given region.
- *
- * @param[in] region Region where to allocate memory from. The region must have
- * the ::HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED flag set.
- *
- * @param[in] size Allocation size, in bytes. Must not be zero. This value is
- * rounded up to the nearest multiple of ::HSA_REGION_INFO_RUNTIME_ALLOC_GRANULE
- * in @p region.
- *
- * @param[out] ptr Pointer to the location where to store the base address of
- * the allocated block. The returned base address is aligned to the value of
- * ::HSA_REGION_INFO_RUNTIME_ALLOC_ALIGNMENT in @p region. If the allocation
- * fails, the returned value is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
- * the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_REGION The region is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION The host is not allowed to
- * allocate memory in @p region, or @p size is greater than the value of
- * HSA_REGION_INFO_ALLOC_MAX_SIZE in @p region.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is NULL, or @p size is 0.
- */
-hsa_status_t HSA_API hsa_memory_allocate(hsa_region_t region,
-    size_t size,
-    void** ptr);
-
-/**
- * @brief Deallocate a block of memory previously allocated using
- * ::hsa_memory_allocate.
- *
- * @param[in] ptr Pointer to a memory block. If @p ptr does not match a value
- * previously returned by ::hsa_memory_allocate, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- */
-hsa_status_t HSA_API hsa_memory_free(void* ptr);
-
-/**
- * @brief Copy a block of memory from the location pointed to by @p src to the
- * memory block pointed to by @p dst.
- *
- * @param[out] dst Buffer where the content is to be copied. If @p dst is in
- * coarse-grained memory, the copied data is only visible to the agent currently
- * assigned (::hsa_memory_assign_agent) to @p dst.
- *
- * @param[in] src A valid pointer to the source of data to be copied. The source
- * buffer must not overlap with the destination buffer. If the source buffer is
- * in coarse-grained memory then it must be assigned to an agent, from which the
- * data will be retrieved.
- *
- * @param[in] size Number of bytes to copy. If @p size is 0, no copy is
- * performed and the function returns success. Copying a number of bytes larger
- * than the size of the buffers pointed by @p dst or @p src results in undefined
- * behavior.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT The source or destination
- * pointers are NULL.
- */
-hsa_status_t HSA_API hsa_memory_copy(
-    void *dst,
-    const void *src,
-    size_t size);
-
-/**
- * @brief Change the ownership of a global, coarse-grained buffer.
- *
- * @details The contents of a coarse-grained buffer are visible to an agent
- * only after ownership has been explicitely transferred to that agent. Once the
- * operation completes, the previous owner cannot longer access the data in the
- * buffer.
- *
- * An implementation of the HSA runtime is allowed, but not required, to change
- * the physical location of the buffer when ownership is transferred to a
- * different agent. In general the application must not assume this
- * behavior. The virtual location (address) of the passed buffer is never
- * modified.
- *
- * @param[in] ptr Base address of a global buffer. The pointer must match an
- * address previously returned by ::hsa_memory_allocate. The size of the buffer
- * affected by the ownership change is identical to the size of that previous
- * allocation. If @p ptr points to a fine-grained global buffer, no operation is
- * performed and the function returns success. If @p ptr does not point to
- * global memory, the behavior is undefined.
- *
- * @param[in] agent Agent that becomes the owner of the buffer. The
- * application is responsible for ensuring that @p agent has access to the
- * region that contains the buffer. It is allowed to change ownership to an
- * agent that is already the owner of the buffer, with the same or different
- * access permissions.
- *
- * @param[in] access Access permissions requested for the new owner.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
- * the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is NULL, or @p access is
- * not a valid access value.
- */
-hsa_status_t HSA_API hsa_memory_assign_agent(
-    void *ptr,
-    hsa_agent_t agent,
-    hsa_access_permission_t access);
-
-/**
- *
- * @brief Register a global, fine-grained buffer.
- *
- * @details Registering a buffer serves as an indication to the HSA runtime that
- * the memory might be accessed from a kernel agent other than the
- * host. Registration is a performance hint that allows the HSA runtime
- * implementation to know which buffers will be accessed by some of the kernel
- * agents ahead of time.
- *
- * Registration is only recommended for buffers in the global segment that have
- * not been allocated using the HSA allocator (::hsa_memory_allocate), but an OS
- * allocator instead. Registering an OS-allocated buffer in the base profile is
- * equivalent to a no-op.
- *
- * Registrations should not overlap.
- *
- * @param[in] ptr A buffer in global, fine-grained memory. If a NULL pointer is
- * passed, no operation is performed. If the buffer has been allocated using
- * ::hsa_memory_allocate, or has already been registered, no operation is
- * performed.
- *
- * @param[in] size Requested registration size in bytes. A size of 0 is
- * only allowed if @p ptr is NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
- * the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p size is 0 but @p ptr
- * is not NULL.
- */
-hsa_status_t HSA_API hsa_memory_register(
-    void *ptr,
-    size_t size);
-
-/**
- *
- * @brief Deregister memory previously registered using ::hsa_memory_register.
- *
- * @details If the memory interval being deregistered does not match a previous
- * registration (start and end addresses), the behavior is undefined.
- *
- * @param[in] ptr A pointer to the base of the buffer to be deregistered. If
- * a NULL pointer is passed, no operation is performed.
- *
- * @param[in] size Size of the buffer to be deregistered.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- */
-hsa_status_t HSA_API hsa_memory_deregister(
-    void *ptr,
-    size_t size);
-
-/** @} */
-
-
-/** \defgroup instruction-set-architecture Instruction Set Architecture.
- *  @{
- */
-
-/**
- * @brief Instruction set architecture.
- */
-typedef struct hsa_isa_s {
-  /**
-   * Opaque handle. Two handles reference the same object of the enclosing type
-   * if and only if they are equal.
-   */
-  uint64_t handle;
-} hsa_isa_t;
-
-/**
- * @brief Retrieve a reference to an instruction set architecture handle out of
- * a symbolic name.
- *
- * @param[in] name Vendor-specific name associated with a a particular
- * instruction set architecture. @p name must start with the vendor name and a
- * colon (for example, "AMD:"). The rest of the name is vendor-specific. Must be
- * a NUL-terminated string.
- *
- * @param[out] isa Memory location where the HSA runtime stores the ISA handle
- * corresponding to the given name. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ISA_NAME The given name does not
- * correspond to any instruction set architecture.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
- * allocate the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p name is NULL, or @p isa is
- * NULL.
- */
-hsa_status_t HSA_API hsa_isa_from_name(
-    const char *name,
-    hsa_isa_t *isa);
-
-/**
- * @brief Iterate over the instruction sets supported by the given agent, and
- * invoke an application-defined callback on every iteration. The iterator is
- * deterministic: if an agent supports several instruction set architectures,
- * they are traversed in the same order in every invocation of this function.
- *
- * @param[in] agent A valid agent.
- *
- * @param[in] callback Callback to be invoked once per instruction set
- * architecture.  The HSA runtime passes two arguments to the callback: the
- * ISA and the application data.  If @p callback returns a status other than
- * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
- * that status value is returned.
- *
- * @param[in] data Application data that is passed to @p callback on every
- * iteration. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
- */
-hsa_status_t HSA_API hsa_agent_iterate_isas(
-    hsa_agent_t agent,
-    hsa_status_t (*callback)(hsa_isa_t isa, void *data),
-    void *data);
-
-/**
- * @brief Instruction set architecture attributes.
- */
-typedef enum {
-  /**
-   * The length of the ISA name in bytes, not including the NUL terminator. The
-   * type of this attribute is uint32_t.
-   */
-  HSA_ISA_INFO_NAME_LENGTH = 0,
-  /**
-   * Human-readable description.  The type of this attribute is character array
-   * with the length equal to the value of ::HSA_ISA_INFO_NAME_LENGTH attribute.
-   */
-  HSA_ISA_INFO_NAME = 1,
-  /**
-   * @deprecated
-   *
-   * Number of call conventions supported by the instruction set architecture.
-   * Must be greater than zero. The type of this attribute is uint32_t.
-   */
-  HSA_ISA_INFO_CALL_CONVENTION_COUNT = 2,
-  /**
-   * @deprecated
-   *
-   * Number of work-items in a wavefront for a given call convention. Must be a
-   * power of 2 in the range [1,256]. The type of this attribute is uint32_t.
-   */
-  HSA_ISA_INFO_CALL_CONVENTION_INFO_WAVEFRONT_SIZE = 3,
-  /**
-   * @deprecated
-   *
-   * Number of wavefronts per compute unit for a given call convention. In
-   * practice, other factors (for example, the amount of group memory used by a
-   * work-group) may further limit the number of wavefronts per compute
-   * unit. The type of this attribute is uint32_t.
-   */
-  HSA_ISA_INFO_CALL_CONVENTION_INFO_WAVEFRONTS_PER_COMPUTE_UNIT = 4,
-  /**
-   * Machine models supported by the instruction set architecture. The type of
-   * this attribute is a bool[2]. If the ISA supports the small machine model,
-   * the element at index ::HSA_MACHINE_MODEL_SMALL is true. If the ISA supports
-   * the large model, the element at index ::HSA_MACHINE_MODEL_LARGE is true.
-   */
-  HSA_ISA_INFO_MACHINE_MODELS = 5,
-  /**
-   * Profiles supported by the instruction set architecture. The type of this
-   * attribute is a bool[2]. If the ISA supports the base profile, the element
-   * at index ::HSA_PROFILE_BASE is true. If the ISA supports the full profile,
-   * the element at index ::HSA_PROFILE_FULL is true.
-   */
-  HSA_ISA_INFO_PROFILES = 6,
-  /**
-   * Default floating-point rounding modes supported by the instruction set
-   * architecture. The type of this attribute is a bool[3]. The value at a given
-   * index is true if the corresponding rounding mode in
-   * ::hsa_default_float_rounding_mode_t is supported. At least one default mode
-   * has to be supported.
-   *
-   * If the default mode is supported, then
-   * ::HSA_ISA_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES must report that
-   * both the zero and the near roundings modes are supported.
-   */
-  HSA_ISA_INFO_DEFAULT_FLOAT_ROUNDING_MODES = 7,
-  /**
-   * Default floating-point rounding modes supported by the instruction set
-   * architecture in the Base profile. The type of this attribute is a
-   * bool[3]. The value at a given index is true if the corresponding rounding
-   * mode in ::hsa_default_float_rounding_mode_t is supported. The value at
-   * index HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT must be false.  At least one
-   * of the values at indexes ::HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO or
-   * HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR must be true.
-   */
-  HSA_ISA_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES = 8,
-  /**
-   * Flag indicating that the f16 HSAIL operation is at least as fast as the
-   * f32 operation in the instruction set architecture. The type of this
-   * attribute is bool.
-   */
-  HSA_ISA_INFO_FAST_F16_OPERATION = 9,
-  /**
-   * Maximum number of work-items of each dimension of a work-group.  Each
-   * maximum must be greater than 0. No maximum can exceed the value of
-   * ::HSA_ISA_INFO_WORKGROUP_MAX_SIZE. The type of this attribute is
-   * uint16_t[3].
-   */
-  HSA_ISA_INFO_WORKGROUP_MAX_DIM = 12,
-  /**
-   * Maximum total number of work-items in a work-group. The type
-   * of this attribute is uint32_t.
-   */
-  HSA_ISA_INFO_WORKGROUP_MAX_SIZE = 13,
-  /**
-   * Maximum number of work-items of each dimension of a grid. Each maximum must
-   * be greater than 0, and must not be smaller than the corresponding value in
-   * ::HSA_ISA_INFO_WORKGROUP_MAX_DIM. No maximum can exceed the value of
-   * ::HSA_ISA_INFO_GRID_MAX_SIZE. The type of this attribute is
-   * ::hsa_dim3_t.
-   */
-  HSA_ISA_INFO_GRID_MAX_DIM = 14,
-  /**
-   * Maximum total number of work-items in a grid. The type of this
-   * attribute is uint64_t.
-   */
-  HSA_ISA_INFO_GRID_MAX_SIZE = 16,
-  /**
-   * Maximum number of fbarriers per work-group. Must be at least 32. The
-   * type of this attribute is uint32_t.
-   */
-  HSA_ISA_INFO_FBARRIER_MAX_SIZE = 17
-} hsa_isa_info_t;
-
-/**
- * @deprecated The concept of call convention has been deprecated. If the
- * application wants to query the value of an attribute for a given instruction
- * set architecture, use ::hsa_isa_get_info_alt instead. If the application
- * wants to query an attribute that is specific to a given combination of ISA
- * and wavefront, use ::hsa_wavefront_get_info.
- *
- * @brief Get the current value of an attribute for a given instruction set
- * architecture (ISA).
- *
- * @param[in] isa A valid instruction set architecture.
- *
- * @param[in] attribute Attribute to query.
- *
- * @param[in] index Call convention index. Used only for call convention
- * attributes, otherwise ignored. Must have a value between 0 (inclusive) and
- * the value of the attribute ::HSA_ISA_INFO_CALL_CONVENTION_COUNT (not
- * inclusive) in @p isa.
- *
- * @param[out] value Pointer to an application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ISA The instruction set architecture is
- * invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_INDEX The index is out of range.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
- * instruction set architecture attribute, or @p value is
- * NULL.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_isa_get_info(
-    hsa_isa_t isa,
-    hsa_isa_info_t attribute,
-    uint32_t index,
-    void *value);
-
-/**
- * @brief Get the current value of an attribute for a given instruction set
- * architecture (ISA).
- *
- * @param[in] isa A valid instruction set architecture.
- *
- * @param[in] attribute Attribute to query.
- *
- * @param[out] value Pointer to an application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ISA The instruction set architecture is
- * invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
- * instruction set architecture attribute, or @p value is
- * NULL.
- */
-hsa_status_t HSA_API hsa_isa_get_info_alt(
-    hsa_isa_t isa,
-    hsa_isa_info_t attribute,
-    void *value);
-
-/**
- * @brief Retrieve the exception policy support for a given combination of
- * instruction set architecture and profile.
- *
- * @param[in] isa A valid instruction set architecture.
- *
- * @param[in] profile Profile.
- *
- * @param[out] mask Pointer to a memory location where the HSA runtime stores a
- * mask of ::hsa_exception_policy_t values. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ISA The instruction set architecture is
- * invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p profile is not a valid
- * profile, or @p mask is NULL.
- */
-hsa_status_t HSA_API hsa_isa_get_exception_policies(
-    hsa_isa_t isa,
-    hsa_profile_t profile,
-    uint16_t *mask);
-
-/**
- * @brief Floating-point types.
- */
-typedef enum {
-  /**
-   * 16-bit floating-point type.
-   */
-  HSA_FP_TYPE_16 = 1,
-  /**
-   * 32-bit floating-point type.
-   */
-  HSA_FP_TYPE_32 = 2,
-  /**
-   * 64-bit floating-point type.
-   */
-  HSA_FP_TYPE_64 = 4
-} hsa_fp_type_t;
-
-/**
- * @brief Flush to zero modes.
- */
-typedef enum {
-  /**
-   * Flush to zero.
-   */
-  HSA_FLUSH_MODE_FTZ = 1,
-  /**
-   * Do not flush to zero.
-   */
-  HSA_FLUSH_MODE_NON_FTZ = 2
-} hsa_flush_mode_t;
-
-/**
- * @brief Round methods.
- */
-typedef enum {
-  /**
-   * Single round method.
-   */
-  HSA_ROUND_METHOD_SINGLE = 1,
-  /**
-   * Double round method.
-   */
-  HSA_ROUND_METHOD_DOUBLE = 2
-} hsa_round_method_t;
-
-/**
- * @brief Retrieve the round method (single or double) used to implement the
- * floating-point multiply add instruction (mad) for a given combination of
- * instruction set architecture, floating-point type, and flush to zero
- * modifier.
- *
- * @param[in] isa Instruction set architecture.
- *
- * @param[in] fp_type Floating-point type.
- *
- * @param[in] flush_mode Flush to zero modifier.
- *
- * @param[out] round_method Pointer to a memory location where the HSA
- * runtime stores the round method used by the implementation. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ISA The instruction set architecture is
- * invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p fp_type is not a valid
- * floating-point type, or @p flush_mode is not a valid flush to zero modifier,
- * or @p round_method is NULL.
- */
-hsa_status_t HSA_API hsa_isa_get_round_method(
-    hsa_isa_t isa,
-    hsa_fp_type_t fp_type,
-    hsa_flush_mode_t flush_mode,
-    hsa_round_method_t *round_method);
-
-/**
- * @brief Wavefront handle
- */
-typedef struct hsa_wavefront_s {
-  /**
-   * Opaque handle. Two handles reference the same object of the enclosing type
-   * if and only if they are equal.
-   */
-  uint64_t handle;
-} hsa_wavefront_t;
-
-/**
- * @brief Wavefront attributes.
- */
-typedef enum {
-  /**
-   * Number of work-items in the wavefront. Must be a power of 2 in the range
-   * [1,256]. The type of this attribute is uint32_t.
-   */
-  HSA_WAVEFRONT_INFO_SIZE = 0
-} hsa_wavefront_info_t;
-
-/**
- * @brief Get the current value of a wavefront attribute.
- *
- * @param[in] wavefront A wavefront.
- *
- * @param[in] attribute Attribute to query.
- *
- * @param[out] value Pointer to an application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_WAVEFRONT The wavefront is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
- * wavefront attribute, or @p value is NULL.
- */
-hsa_status_t HSA_API hsa_wavefront_get_info(
-    hsa_wavefront_t wavefront,
-    hsa_wavefront_info_t attribute,
-    void *value);
-
-/**
- * @brief Iterate over the different wavefronts supported by an instruction set
- * architecture, and invoke an application-defined callback on every iteration.
- *
- * @param[in] isa Instruction set architecture.
- *
- * @param[in] callback Callback to be invoked once per wavefront that is
- * supported by the agent. The HSA runtime passes two arguments to the callback:
- * the wavefront handle and the application data.  If @p callback returns a
- * status other than ::HSA_STATUS_SUCCESS for a particular iteration, the
- * traversal stops and that value is returned.
- *
- * @param[in] data Application data that is passed to @p callback on every
- * iteration. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ISA The instruction set architecture is
- * invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
- */
-hsa_status_t HSA_API hsa_isa_iterate_wavefronts(
-    hsa_isa_t isa,
-    hsa_status_t (*callback)(hsa_wavefront_t wavefront, void *data),
-    void *data);
-
-/**
- * @deprecated Use ::hsa_agent_iterate_isas to query which instructions set
- * architectures are supported by a given agent.
- *
- * @brief Check if the instruction set architecture of a code object can be
- * executed on an agent associated with another architecture.
- *
- * @param[in] code_object_isa Instruction set architecture associated with a
- * code object.
- *
- * @param[in] agent_isa Instruction set architecture associated with an agent.
- *
- * @param[out] result Pointer to a memory location where the HSA runtime stores
- * the result of the check. If the two architectures are compatible, the result
- * is true; if they are incompatible, the result is false.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ISA @p code_object_isa or @p agent_isa are
- * invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p result is NULL.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_isa_compatible(
-    hsa_isa_t code_object_isa,
-    hsa_isa_t agent_isa,
-    bool *result);
-
-/** @} */
-
-
-/** \defgroup executable Executable
- *  @{
- */
-
-/**
- * @brief Code object reader handle. A code object reader is used to
- * load a code object from file (when created using
- * ::hsa_code_object_reader_create_from_file), or from memory (if created using
- * ::hsa_code_object_reader_create_from_memory).
- */
-typedef struct hsa_code_object_reader_s {
-  /**
-   * Opaque handle. Two handles reference the same object of the enclosing type
-   * if and only if they are equal.
-   */
-  uint64_t handle;
-} hsa_code_object_reader_t;
-
-/**
- * @brief Create a code object reader to operate on a file.
- *
- * @param[in] file File descriptor. The file must have been opened by
- * application with at least read permissions prior calling this function. The
- * file must contain a vendor-specific code object.
- *
- * The file is owned and managed by the application; the lifetime of the file
- * descriptor must exceed that of any associated code object reader.
- *
- * @param[out] code_object_reader Memory location to store the newly created
- * code object reader handle. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_FILE @p file is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
- * allocate the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p code_object_reader is NULL.
- */
-hsa_status_t HSA_API hsa_code_object_reader_create_from_file(
-    hsa_file_t file,
-    hsa_code_object_reader_t *code_object_reader);
-
-/**
- * @brief Create a code object reader to operate on memory.
- *
- * @param[in] code_object Memory buffer that contains a vendor-specific code
- * object. The buffer is owned and managed by the application; the lifetime of
- * the buffer must exceed that of any associated code object reader.
- *
- * @param[in] size Size of the buffer pointed to by @p code_object. Must not be
- * 0.
- *
- * @param[out] code_object_reader Memory location to store newly created code
- * object reader handle. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
- * allocate the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p code_object is NULL, @p size
- * is zero, or @p code_object_reader is NULL.
- */
-hsa_status_t HSA_API hsa_code_object_reader_create_from_memory(
-    const void *code_object,
-    size_t size,
-    hsa_code_object_reader_t *code_object_reader);
-
-/**
- * @brief Destroy a code object reader.
- *
- * @details The code object reader handle becomes invalid after completion of
- * this function. Any file or memory used to create the code object read is not
- * closed, removed, or deallocated by this function.
- *
- * @param[in] code_object_reader Code object reader to destroy.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT_READER @p code_object_reader
- * is invalid.
- */
-hsa_status_t HSA_API hsa_code_object_reader_destroy(
-    hsa_code_object_reader_t code_object_reader);
-
-/**
- * @brief Struct containing an opaque handle to an executable, which contains
- * ISA for finalized kernels and indirect functions together with the allocated
- * global or readonly segment variables they reference.
- */
-typedef struct hsa_executable_s {
-  /**
-   * Opaque handle. Two handles reference the same object of the enclosing type
-   * if and only if they are equal.
-   */
-  uint64_t handle;
-} hsa_executable_t;
-
-/**
- * @brief Executable state.
- */
-typedef enum {
-  /**
-   * Executable state, which allows the user to load code objects and define
-   * external variables. Variable addresses, kernel code handles, and
-   * indirect function code handles are not available in query operations until
-   * the executable is frozen (zero always returned).
-   */
-  HSA_EXECUTABLE_STATE_UNFROZEN = 0,
-  /**
-   * Executable state, which allows the user to query variable addresses,
-   * kernel code handles, and indirect function code handles using query
-   * operations. Loading new code objects, as well as defining external
-   * variables, is not allowed in this state.
-   */
-  HSA_EXECUTABLE_STATE_FROZEN = 1
-} hsa_executable_state_t;
-
-/**
- * @deprecated Use ::hsa_executable_create_alt instead, which allows the
- * application to specify the default floating-point rounding mode of the
- * executable and assumes an unfrozen initial state.
- *
- * @brief Create an empty executable.
- *
- * @param[in] profile Profile used in the executable.
- *
- * @param[in] executable_state Executable state. If the state is
- * ::HSA_EXECUTABLE_STATE_FROZEN, the resulting executable is useless because no
- * code objects can be loaded, and no variables can be defined.
- *
- * @param[in] options Standard and vendor-specific options. Unknown options are
- * ignored. A standard option begins with the "-hsa_" prefix. Options beginning
- * with the "-hsa_ext_<extension_name>_" prefix are reserved for extensions. A
- * vendor-specific option begins with the "-<vendor_name>_" prefix. Must be a
- * NUL-terminated string. May be NULL.
- *
- * @param[out] executable Memory location where the HSA runtime stores the newly
- * created executable handle.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
- * allocate the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p profile is invalid, or
- * @p executable is NULL.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_executable_create(
-    hsa_profile_t profile,
-    hsa_executable_state_t executable_state,
-    const char *options,
-    hsa_executable_t *executable);
-
-/**
- * @brief Create an empty executable.
- *
- * @param[in] profile Profile used in the executable.
- *
- * @param[in] default_float_rounding_mode Default floating-point rounding mode
- * used in the executable. Allowed rounding modes are near and zero (default is
- * not allowed).
- *
- * @param[in] options Standard and vendor-specific options. Unknown options are
- * ignored. A standard option begins with the "-hsa_" prefix. Options beginning
- * with the "-hsa_ext_<extension_name>_" prefix are reserved for extensions. A
- * vendor-specific option begins with the "-<vendor_name>_" prefix. Must be a
- * NUL-terminated string. May be NULL.
- *
- * @param[out] executable Memory location where the HSA runtime stores newly
- * created executable handle. The initial state of the executable is
- * ::HSA_EXECUTABLE_STATE_UNFROZEN.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
- * allocate the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p profile is invalid, or
- * @p executable is NULL.
- */
-hsa_status_t HSA_API hsa_executable_create_alt(
-    hsa_profile_t profile,
-    hsa_default_float_rounding_mode_t default_float_rounding_mode,
-    const char *options,
-    hsa_executable_t *executable);
-
-/**
- * @brief Destroy an executable.
- *
- * @details An executable handle becomes invalid after the executable has been
- * destroyed. Code object handles that were loaded into this executable are
- * still valid after the executable has been destroyed, and can be used as
- * intended. Resources allocated outside and associated with this executable
- * (such as external global or readonly variables) can be released after the
- * executable has been destroyed.
- *
- * Executable should not be destroyed while kernels are in flight.
- *
- * @param[in] executable Executable.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
- */
-hsa_status_t HSA_API hsa_executable_destroy(
-    hsa_executable_t executable);
-
-/**
- * @brief Loaded code object handle.
- */
-typedef struct hsa_loaded_code_object_s {
-  /**
-   * Opaque handle. Two handles reference the same object of the enclosing type
-   * if and only if they are equal.
-   */
-  uint64_t handle;
-} hsa_loaded_code_object_t;
-
-/**
- * @brief Load a program code object into an executable.
- *
- * @details A program code object contains information about resources that are
- * accessible by all kernel agents that run the executable, and can be loaded
- * at most once into an executable.
- *
- * If the program code object uses extensions, the implementation must support
- * them for this operation to return successfully.
- *
- * @param[in] executable Executable.
- *
- * @param[in] code_object_reader A code object reader that holds the program
- * code object to load. If a code object reader is destroyed before all the
- * associated executables are destroyed, the behavior is undefined.
- *
- * @param[in] options Standard and vendor-specific options. Unknown options are
- * ignored. A standard option begins with the "-hsa_" prefix. Options beginning
- * with the "-hsa_ext_<extension_name>_" prefix are reserved for extensions. A
- * vendor-specific option begins with the "-<vendor_name>_" prefix. Must be a
- * NUL-terminated string. May be NULL.
- *
- * @param[out] loaded_code_object Pointer to a memory location where the HSA
- * runtime stores the loaded code object handle. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
- * allocate the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE The executable is frozen.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT_READER @p code_object_reader
- * is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS The program code object is
- * not compatible with the executable or the implementation (for example, the
- * code object uses an extension that is not supported by the implementation).
- */
-hsa_status_t HSA_API hsa_executable_load_program_code_object(
-    hsa_executable_t executable,
-    hsa_code_object_reader_t code_object_reader,
-    const char *options,
-    hsa_loaded_code_object_t *loaded_code_object);
-
-/**
- * @brief Load an agent code object into an executable.
- *
- * @details The agent code object contains all defined agent
- * allocation variables, functions, indirect functions, and kernels in a given
- * program for a given instruction set architecture.
- *
- * Any module linkage declaration must have been defined either by a define
- * variable or by loading a code object that has a symbol with module linkage
- * definition.
- *
- * The default floating-point rounding mode of the code object associated with
- * @p code_object_reader must match that of the executable
- * (::HSA_EXECUTABLE_INFO_DEFAULT_FLOAT_ROUNDING_MODE), or be default (in which
- * case the value of ::HSA_EXECUTABLE_INFO_DEFAULT_FLOAT_ROUNDING_MODE is used).
- * If the agent code object uses extensions, the implementation and the agent
- * must support them for this operation to return successfully.
- *
- * @param[in] executable Executable.
- *
- * @param[in] agent Agent to load code object for. A code object can be loaded
- * into an executable at most once for a given agent. The instruction set
- * architecture of the code object must be supported by the agent.
- *
- * @param[in] code_object_reader A code object reader that holds the code object
- * to load. If a code object reader is destroyed before all the associated
- * executables are destroyed, the behavior is undefined.
- *
- * @param[in] options Standard and vendor-specific options. Unknown options are
- * ignored. A standard option begins with the "-hsa_" prefix. Options beginning
- * with the "-hsa_ext_<extension_name>_" prefix are reserved for extensions. A
- * vendor-specific option begins with the "-<vendor_name>_" prefix. Must be a
- * NUL-terminated string. May be NULL.
- *
- * @param[out] loaded_code_object Pointer to a memory location where the HSA
- * runtime stores the loaded code object handle. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
- * allocate the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE The executable is frozen.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT_READER @p code_object_reader
- * is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS The code object read by @p
- * code_object_reader is not compatible with the agent (for example, the agent
- * does not support the instruction set architecture of the code object), the
- * executable (for example, there is a default floating-point mode mismatch
- * between the two), or the implementation.
- */
-hsa_status_t HSA_API hsa_executable_load_agent_code_object(
-    hsa_executable_t executable,
-    hsa_agent_t agent,
-    hsa_code_object_reader_t code_object_reader,
-    const char *options,
-    hsa_loaded_code_object_t *loaded_code_object);
-
-/**
- * @brief Freeze the executable.
- *
- * @details No modifications to executable can be made after freezing: no code
- * objects can be loaded to the executable, and no external variables can be
- * defined. Freezing the executable does not prevent querying the executable's
- * attributes. The application must define all the external variables in an
- * executable before freezing it.
- *
- * @param[in] executable Executable.
- *
- * @param[in] options Standard and vendor-specific options. Unknown options are
- * ignored. A standard option begins with the "-hsa_" prefix. Options beginning
- * with the "-hsa_ext_<extension_name>_" prefix are reserved for extensions. A
- * vendor-specific option begins with the "-<vendor_name>_" prefix. Must be a
- * NUL-terminated string. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_VARIABLE_UNDEFINED One or more variables are
- * undefined in the executable.
- *
- * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is already frozen.
- */
-hsa_status_t HSA_API hsa_executable_freeze(
-    hsa_executable_t executable,
-    const char *options);
-
-/**
- * @brief Executable attributes.
- */
-typedef enum {
-  /**
-   * Profile this executable is created for. The type of this attribute is
-   * ::hsa_profile_t.
-   */
-  HSA_EXECUTABLE_INFO_PROFILE = 1,
-  /**
-   * Executable state. The type of this attribute is ::hsa_executable_state_t.
-   */
-  HSA_EXECUTABLE_INFO_STATE = 2,
-  /**
-   * Default floating-point rounding mode specified when executable was created.
-   * The type of this attribute is ::hsa_default_float_rounding_mode_t.
-   */
-  HSA_EXECUTABLE_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 3
-} hsa_executable_info_t;
-
-/**
- * @brief Get the current value of an attribute for a given executable.
- *
- * @param[in] executable Executable.
- *
- * @param[in] attribute Attribute to query.
- *
- * @param[out] value Pointer to an application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
- * executable attribute, or @p value is NULL.
- */
-hsa_status_t HSA_API hsa_executable_get_info(
-    hsa_executable_t executable,
-    hsa_executable_info_t attribute,
-    void *value);
-
-/**
- * @brief Define an external global variable with program allocation.
- *
- * @details This function allows the application to provide the definition
- * of a variable in the global segment memory with program allocation. The
- * variable must be defined before loading a code object into an executable.
- * In addition, code objects loaded must not define the variable.
- *
- * @param[in] executable Executable. Must not be in frozen state.
- *
- * @param[in] variable_name Name of the variable. The Programmer's Reference
- * Manual describes the standard name mangling scheme.
- *
- * @param[in] address Address where the variable is defined. This address must
- * be in global memory and can be read and written by any agent in the
- * system. The application cannot deallocate the buffer pointed by @p address
- * before @p executable is destroyed.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
- * allocate the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED The variable is
- * already defined.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no variable with the
- * @p variable_name.
- *
- * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is frozen.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p variable_name is NULL.
- */
-hsa_status_t HSA_API hsa_executable_global_variable_define(
-    hsa_executable_t executable,
-    const char *variable_name,
-    void *address);
-
-/**
- * @brief Define an external global variable with agent allocation.
- *
- * @details This function allows the application to provide the definition
- * of a variable in the global segment memory with agent allocation. The
- * variable must be defined before loading a code object into an executable.
- * In addition, code objects loaded must not define the variable.
- *
- * @param[in] executable Executable. Must not be in frozen state.
- *
- * @param[in] agent Agent for which the variable is being defined.
- *
- * @param[in] variable_name Name of the variable. The Programmer's Reference
- * Manual describes the standard name mangling scheme.
- *
- * @param[in] address Address where the variable is defined. This address must
- * have been previously allocated using ::hsa_memory_allocate in a global region
- * that is only visible to @p agent. The application cannot deallocate the
- * buffer pointed by @p address before @p executable is destroyed.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
- * allocate the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT @p agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED The variable is
- * already defined.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no variable with the
- * @p variable_name.
- *
- * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is frozen.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p variable_name is NULL.
- */
-hsa_status_t HSA_API hsa_executable_agent_global_variable_define(
-    hsa_executable_t executable,
-    hsa_agent_t agent,
-    const char *variable_name,
-    void *address);
-
-/**
- * @brief Define an external readonly variable.
- *
- * @details This function allows the application to provide the definition
- * of a variable in the readonly segment memory. The variable must be defined
- * before loading a code object into an executable. In addition, code objects
- * loaded must not define the variable.
- *
- * @param[in] executable Executable. Must not be in frozen state.
- *
- * @param[in] agent Agent for which the variable is being defined.
- *
- * @param[in] variable_name Name of the variable. The Programmer's Reference
- * Manual describes the standard name mangling scheme.
- *
- * @param[in] address Address where the variable is defined. This address must
- * have been previously allocated using ::hsa_memory_allocate in a readonly
- * region associated with @p agent. The application cannot deallocate the buffer
- * pointed by @p address before @p executable is destroyed.
- *
- * @param[in] address Address where the variable is defined. The buffer pointed
- * by @p address is owned by the application, and cannot be deallocated before
- * @p executable is destroyed.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
- * allocate the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE Executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT @p agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED The variable is
- * already defined.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no variable with the
- * @p variable_name.
- *
- * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is frozen.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p variable_name is NULL.
- */
-hsa_status_t HSA_API hsa_executable_readonly_variable_define(
-    hsa_executable_t executable,
-    hsa_agent_t agent,
-    const char *variable_name,
-    void *address);
-
-/**
- * @brief Validate an executable. Checks that all code objects have matching
- * machine model, profile, and default floating-point rounding mode. Checks that
- * all declarations have definitions. Checks declaration-definition
- * compatibility (see the HSA Programming Reference Manual for compatibility
- * rules). Invoking this function is equivalent to invoking
- * ::hsa_executable_validate_alt with no options.
- *
- * @param[in] executable Executable. Must be in frozen state.
- *
- * @param[out] result Memory location where the HSA runtime stores the
- * validation result. If the executable passes validation, the result is 0.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE @p executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p result is NULL.
- */
-hsa_status_t HSA_API hsa_executable_validate(
-    hsa_executable_t executable,
-    uint32_t *result);
-
-/**
- * @brief Validate an executable. Checks that all code objects have matching
- * machine model, profile, and default floating-point rounding mode. Checks that
- * all declarations have definitions. Checks declaration-definition
- * compatibility (see the HSA Programming Reference Manual for compatibility
- * rules).
- *
- * @param[in] executable Executable. Must be in frozen state.
- *
- * @param[in] options Standard and vendor-specific options. Unknown options are
- * ignored. A standard option begins with the "-hsa_" prefix. Options beginning
- * with the "-hsa_ext_<extension_name>_" prefix are reserved for extensions. A
- * vendor-specific option begins with the "-<vendor_name>_" prefix. Must be a
- * NUL-terminated string. May be NULL.
- *
- * @param[out] result Memory location where the HSA runtime stores the
- * validation result. If the executable passes validation, the result is 0.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE @p executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p result is NULL.
- */
-hsa_status_t HSA_API hsa_executable_validate_alt(
-    hsa_executable_t executable,
-    const char *options,
-    uint32_t *result);
-
-/**
- * @brief Executable symbol handle.
- *
- * The lifetime of an executable object symbol matches that of the executable
- * associated with it. An operation on a symbol whose associated executable has
- * been destroyed results in undefined behavior.
- */
-typedef struct hsa_executable_symbol_s {
-  /**
-   * Opaque handle. Two handles reference the same object of the enclosing type
-   * if and only if they are equal.
-   */
-  uint64_t handle;
-} hsa_executable_symbol_t;
-
-/**
- * @deprecated Use ::hsa_executable_get_symbol_by_name instead.
- *
- * @brief Get the symbol handle for a given a symbol name.
- *
- * @param[in] executable Executable.
- *
- * @param[in] module_name Module name. Must be NULL if the symbol has
- * program linkage.
- *
- * @param[in] symbol_name Symbol name.
- *
- * @param[in] agent Agent associated with the symbol. If the symbol is
- * independent of any agent (for example, a variable with program
- * allocation), this argument is ignored.
- *
- * @param[in] call_convention Call convention associated with the symbol. If the
- * symbol does not correspond to an indirect function, this argument is ignored.
- *
- * @param[out] symbol Memory location where the HSA runtime stores the symbol
- * handle.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no symbol with a name
- * that matches @p symbol_name.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p symbol_name is NULL, or
- * @p symbol is NULL.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_executable_get_symbol(
-    hsa_executable_t executable,
-    const char *module_name,
-    const char *symbol_name,
-    hsa_agent_t agent,
-    int32_t call_convention,
-    hsa_executable_symbol_t *symbol);
-
-/**
- * @brief Retrieve the symbol handle corresponding to a given a symbol name.
- *
- * @param[in] executable Executable.
- *
- * @param[in] symbol_name Symbol name. Must be a NUL-terminated character
- * array. The Programmer's Reference Manual describes the standard name mangling
- * scheme.
- *
- * @param[in] agent Pointer to the agent for which the symbol with the given
- * name is defined. If the symbol corresponding to the given name has program
- * allocation, @p agent must be NULL.
- *
- * @param[out] symbol Memory location where the HSA runtime stores the symbol
- * handle. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no symbol with a name
- * that matches @p symbol_name.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p symbol_name is NULL, or @p
- * symbol is NULL.
- */
-hsa_status_t HSA_API hsa_executable_get_symbol_by_name(
-    hsa_executable_t executable,
-    const char *symbol_name,
-    const hsa_agent_t *agent,
-    hsa_executable_symbol_t *symbol);
-
-/**
- * @brief Symbol type.
- */
-typedef enum {
-  /**
-   * Variable.
-   */
-  HSA_SYMBOL_KIND_VARIABLE = 0,
-  /**
-   * Kernel.
-   */
-  HSA_SYMBOL_KIND_KERNEL = 1,
-  /**
-   * Indirect function.
-   */
-  HSA_SYMBOL_KIND_INDIRECT_FUNCTION = 2
-} hsa_symbol_kind_t;
-
-/**
- * @brief Linkage type of a symbol.
- */
-typedef enum {
-  /**
-   * Module linkage.
-   */
-  HSA_SYMBOL_LINKAGE_MODULE = 0,
-  /**
-   * Program linkage.
-   */
-  HSA_SYMBOL_LINKAGE_PROGRAM = 1
-} hsa_symbol_linkage_t;
-
-/**
- * @brief Allocation type of a variable.
- */
-typedef enum {
-  /**
-   * Agent allocation.
-   */
-  HSA_VARIABLE_ALLOCATION_AGENT = 0,
-  /**
-   * Program allocation.
-   */
-  HSA_VARIABLE_ALLOCATION_PROGRAM = 1
-} hsa_variable_allocation_t;
-
-/**
- * @brief Memory segment associated with a variable.
- */
-typedef enum {
-  /**
-   * Global memory segment.
-   */
-  HSA_VARIABLE_SEGMENT_GLOBAL = 0,
-  /**
-   * Readonly memory segment.
-   */
-  HSA_VARIABLE_SEGMENT_READONLY = 1
-} hsa_variable_segment_t;
-
-/**
- * @brief Executable symbol attributes.
- */
-typedef enum {
-  /**
-   * The kind of the symbol. The type of this attribute is ::hsa_symbol_kind_t.
-   */
-  HSA_EXECUTABLE_SYMBOL_INFO_TYPE = 0,
-  /**
-   * The length of the symbol name in bytes, not including the NUL terminator.
-   * The type of this attribute is uint32_t.
-   */
-  HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH = 1,
-  /**
-   * The name of the symbol. The type of this attribute is character array with
-   * the length equal to the value of ::HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH
-   * attribute.
-   */
-  HSA_EXECUTABLE_SYMBOL_INFO_NAME = 2,
-  /**
-   * @deprecated
-   *
-   * The length of the module name in bytes (not including the NUL terminator)
-   * to which this symbol belongs if this symbol has module linkage, otherwise 0
-   * is returned. The type of this attribute is uint32_t.
-   */
-  HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME_LENGTH = 3,
-  /**
-   * @deprecated
-   *
-   * The module name to which this symbol belongs if this symbol has module
-   * linkage, otherwise an empty string is returned. The type of this attribute
-   * is character array with the length equal to the value of
-   * ::HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME_LENGTH attribute.
-   */
-  HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME = 4,
-  /**
-   * @deprecated
-   *
-   * Agent associated with this symbol. If the symbol is a variable, the
-   * value of this attribute is only defined if
-   * ::HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALLOCATION is
-   * ::HSA_VARIABLE_ALLOCATION_AGENT. The type of this attribute is hsa_agent_t.
-   */
-  HSA_EXECUTABLE_SYMBOL_INFO_AGENT = 20,
-  /**
-   * The address of the variable. The value of this attribute is undefined if
-   * the symbol is not a variable. The type of this attribute is uint64_t.
-   *
-   * If executable's state is ::HSA_EXECUTABLE_STATE_UNFROZEN, then 0 is
-   * returned.
-   */
-  HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS = 21,
-  /**
-   * The linkage kind of the symbol. The type of this attribute is
-   * ::hsa_symbol_linkage_t.
-   */
-  HSA_EXECUTABLE_SYMBOL_INFO_LINKAGE = 5,
-  /**
-   * Indicates whether the symbol corresponds to a definition. The type of this
-   * attribute is bool.
-   */
-  HSA_EXECUTABLE_SYMBOL_INFO_IS_DEFINITION = 17,
-  /**
-   * @deprecated
-   *
-   * The allocation kind of the variable. The value of this attribute is
-   * undefined if the symbol is not a variable.  The type of this attribute is
-   * ::hsa_variable_allocation_t.
-   */
-  HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALLOCATION = 6,
-  /**
-   * @deprecated
-   *
-   * The segment kind of the variable. The value of this attribute is undefined
-   * if the symbol is not a variable. The type of this attribute is
-   * ::hsa_variable_segment_t.
-   */
-  HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SEGMENT = 7,
-  /**
-   * @deprecated
-   *
-   * Alignment of the symbol in memory. The value of this attribute is undefined
-   * if the symbol is not a variable. The type of this attribute is uint32_t.
-   *
-   * The current alignment of the variable in memory may be greater than the
-   * value specified in the source program variable declaration.
-   */
-  HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALIGNMENT = 8,
-  /**
-   * @deprecated
-   *
-   * Size of the variable. The value of this attribute is undefined if
-   * the symbol is not a variable. The type of this attribute is uint32_t.
-   *
-   * A value of 0 is returned if the variable is an external variable and has an
-   * unknown dimension.
-   */
-  HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE = 9,
-  /**
-   * @deprecated
-   *
-   * Indicates whether the variable is constant. The value of this attribute is
-   * undefined if the symbol is not a variable. The type of this attribute is
-   * bool.
-   */
-  HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_IS_CONST = 10,
-  /**
-   * Kernel object handle, used in the kernel dispatch packet. The value of this
-   * attribute is undefined if the symbol is not a kernel. The type of this
-   * attribute is uint64_t.
-   *
-   * If the state of the executable is ::HSA_EXECUTABLE_STATE_UNFROZEN, then 0
-   * is returned.
-   */
-  HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT = 22,
-  /**
-   * Size of kernarg segment memory that is required to hold the values of the
-   * kernel arguments, in bytes. Must be a multiple of 16. The value of this
-   * attribute is undefined if the symbol is not a kernel. The type of this
-   * attribute is uint32_t.
-   */
-  HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE = 11,
-  /**
-   * Alignment (in bytes) of the buffer used to pass arguments to the kernel,
-   * which is the maximum of 16 and the maximum alignment of any of the kernel
-   * arguments. The value of this attribute is undefined if the symbol is not a
-   * kernel. The type of this attribute is uint32_t.
-   */
-  HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT = 12,
-  /**
-   * Size of static group segment memory required by the kernel (per
-   * work-group), in bytes. The value of this attribute is undefined
-   * if the symbol is not a kernel. The type of this attribute is uint32_t.
-   *
-   * The reported amount does not include any dynamically allocated group
-   * segment memory that may be requested by the application when a kernel is
-   * dispatched.
-   */
-  HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE = 13,
-  /**
-   * Size of static private, spill, and arg segment memory required by
-   * this kernel (per work-item), in bytes. The value of this attribute is
-   * undefined if the symbol is not a kernel. The type of this attribute is
-   * uint32_t.
-   *
-   * If the value of ::HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK is
-   * true, the kernel may use more private memory than the reported value, and
-   * the application must add the dynamic call stack usage to @a
-   * private_segment_size when populating a kernel dispatch packet.
-   */
-  HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE = 14,
-  /**
-   * Dynamic callstack flag. The value of this attribute is undefined if the
-   * symbol is not a kernel. The type of this attribute is bool.
-   *
-   * If this flag is set (the value is true), the kernel uses a dynamically
-   * sized call stack. This can happen if recursive calls, calls to indirect
-   * functions, or the HSAIL alloca instruction are present in the kernel.
-   */
-  HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK = 15,
-  /**
-   * @deprecated
-   *
-   * Call convention of the kernel. The value of this attribute is undefined if
-   * the symbol is not a kernel. The type of this attribute is uint32_t.
-   */
-  HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_CALL_CONVENTION = 18,
-  /**
-   * Indirect function object handle. The value of this attribute is undefined
-   * if the symbol is not an indirect function, or the associated agent does
-   * not support the Full Profile. The type of this attribute depends on the
-   * machine model: the type is uint32_t for small machine model, and uint64_t
-   * for large model.
-   *
-   * If the state of the executable is ::HSA_EXECUTABLE_STATE_UNFROZEN, then 0
-   * is returned.
-   */
-  HSA_EXECUTABLE_SYMBOL_INFO_INDIRECT_FUNCTION_OBJECT = 23,
-  /**
-   * @deprecated
-   *
-   * Call convention of the indirect function. The value of this attribute is
-   * undefined if the symbol is not an indirect function, or the associated
-   * agent does not support the Full Profile. The type of this attribute is
-   * uint32_t.
-   */
-  HSA_EXECUTABLE_SYMBOL_INFO_INDIRECT_FUNCTION_CALL_CONVENTION = 16
-} hsa_executable_symbol_info_t;
-
-/**
- * @brief Get the current value of an attribute for a given executable symbol.
- *
- * @param[in] executable_symbol Executable symbol.
- *
- * @param[in] attribute Attribute to query.
- *
- * @param[out] value Pointer to an application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE_SYMBOL The executable symbol is
- * invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
- * executable symbol attribute, or @p value is NULL.
- */
-hsa_status_t HSA_API hsa_executable_symbol_get_info(
-    hsa_executable_symbol_t executable_symbol,
-    hsa_executable_symbol_info_t attribute,
-    void *value);
-
-/**
- * @deprecated
- *
- * @brief Iterate over the symbols in a executable, and invoke an
- * application-defined callback on every iteration.
- *
- * @param[in] executable Executable.
- *
- * @param[in] callback Callback to be invoked once per executable symbol. The
- * HSA runtime passes three arguments to the callback: the executable, a symbol,
- * and the application data.  If @p callback returns a status other than
- * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
- * ::hsa_executable_iterate_symbols returns that status value.
- *
- * @param[in] data Application data that is passed to @p callback on every
- * iteration. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_executable_iterate_symbols(
-    hsa_executable_t executable,
-    hsa_status_t (*callback)(hsa_executable_t exec,
-                             hsa_executable_symbol_t symbol,
-                             void *data),
-    void *data);
-
-/**
- * @brief Iterate over the kernels, indirect functions, and agent allocation
- * variables in an executable for a given agent, and invoke an application-
- * defined callback on every iteration.
- *
- * @param[in] executable Executable.
- *
- * @param[in] agent Agent.
- *
- * @param[in] callback Callback to be invoked once per executable symbol. The
- * HSA runtime passes three arguments to the callback: the executable, a symbol,
- * and the application data.  If @p callback returns a status other than
- * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
- * ::hsa_executable_iterate_symbols returns that status value.
- *
- * @param[in] data Application data that is passed to @p callback on every
- * iteration. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
- */
-hsa_status_t HSA_API hsa_executable_iterate_agent_symbols(
-    hsa_executable_t executable,
-    hsa_agent_t agent,
-    hsa_status_t (*callback)(hsa_executable_t exec,
-                             hsa_agent_t agent,
-                             hsa_executable_symbol_t symbol,
-                             void *data),
-    void *data);
-
-/**
- * @brief Iterate over the program allocation variables in an executable, and
- * invoke an application-defined callback on every iteration.
- *
- * @param[in] executable Executable.
- *
- * @param[in] callback Callback to be invoked once per executable symbol. The
- * HSA runtime passes three arguments to the callback: the executable, a symbol,
- * and the application data.  If @p callback returns a status other than
- * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
- * ::hsa_executable_iterate_symbols returns that status value.
- *
- * @param[in] data Application data that is passed to @p callback on every
- * iteration. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
- */
-hsa_status_t HSA_API hsa_executable_iterate_program_symbols(
-    hsa_executable_t executable,
-    hsa_status_t (*callback)(hsa_executable_t exec,
-                             hsa_executable_symbol_t symbol,
-                             void *data),
-    void *data);
-
-/** @} */
-
-
-/** \defgroup code-object Code Objects (deprecated).
- *  @{
- */
-
-/**
- * @deprecated
- *
- * @brief Struct containing an opaque handle to a code object, which contains
- * ISA for finalized kernels and indirect functions together with information
- * about the global or readonly segment variables they reference.
- */
-typedef struct hsa_code_object_s {
-  /**
-   * Opaque handle. Two handles reference the same object of the enclosing type
-   * if and only if they are equal.
-   */
-  uint64_t handle;
-} hsa_code_object_t;
-
-/**
- * @deprecated
- *
- * @brief Application data handle that is passed to the serialization
- * and deserialization functions.
- */
-typedef struct hsa_callback_data_s {
-  /**
-   * Opaque handle.
-   */
-  uint64_t handle;
-} hsa_callback_data_t;
-
-/**
- * @deprecated
- *
- * @brief Serialize a code object. Can be used for offline finalization,
- * install-time finalization, disk code caching, etc.
- *
- * @param[in] code_object Code object.
- *
- * @param[in] alloc_callback Callback function for memory allocation. Must not
- * be NULL. The HSA runtime passes three arguments to the callback: the
- * allocation size, the application data, and a pointer to a memory location
- * where the application stores the allocation result. The HSA runtime invokes
- * @p alloc_callback once to allocate a buffer that contains the serialized
- * version of @p code_object.  If the callback returns a status code other than
- * ::HSA_STATUS_SUCCESS, this function returns the same code.
- *
- * @param[in] callback_data Application data that is passed to @p
- * alloc_callback. May be NULL.
- *
- * @param[in] options Standard and vendor-specific options. Unknown options are
- * ignored. A standard option begins with the "-hsa_" prefix. Options beginning
- * with the "-hsa_ext_<extension_name>_" prefix are reserved for extensions. A
- * vendor-specific option begins with the "-<vendor_name>_" prefix. Must be a
- * NUL-terminated string. May be NULL.
- *
- * @param[out] serialized_code_object Memory location where the HSA runtime
- * stores a pointer to the serialized code object. Must not be NULL.
- *
- * @param[out] serialized_code_object_size Memory location where the HSA runtime
- * stores the size (in bytes) of @p serialized_code_object. The returned value
- * matches the allocation size passed by the HSA runtime to @p
- * alloc_callback. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
- * allocate the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p alloc_callback, @p
- * serialized_code_object, or @p serialized_code_object_size are NULL.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_code_object_serialize(
-    hsa_code_object_t code_object,
-    hsa_status_t (*alloc_callback)(size_t size,
-                                   hsa_callback_data_t data,
-                                   void **address),
-    hsa_callback_data_t callback_data,
-    const char *options,
-    void **serialized_code_object,
-    size_t *serialized_code_object_size);
-
-/**
- * @deprecated
- *
- * @brief Deserialize a code object.
- *
- * @param[in] serialized_code_object A serialized code object. Must not be NULL.
- *
- * @param[in] serialized_code_object_size The size (in bytes) of @p
- * serialized_code_object. Must not be 0.
- *
- * @param[in] options Standard and vendor-specific options. Unknown options are
- * ignored. A standard option begins with the "-hsa_" prefix. Options beginning
- * with the "-hsa_ext_<extension_name>_" prefix are reserved for extensions. A
- * vendor-specific option begins with the "-<vendor_name>_" prefix. Must be a
- * NUL-terminated string. May be NULL.
- *
- * @param[out] code_object Memory location where the HSA runtime stores the
- * deserialized code object.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
- * allocate the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p serialized_code_object, or @p
- * code_object are NULL, or @p serialized_code_object_size is 0.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_code_object_deserialize(
-    void *serialized_code_object,
-    size_t serialized_code_object_size,
-    const char *options,
-    hsa_code_object_t *code_object);
-
-/**
- * @deprecated
- *
- * @brief Destroy a code object.
- *
- * @details The lifetime of a code object must exceed that of any executable
- * where it has been loaded. If an executable that loaded @p code_object has not
- * been destroyed, the behavior is undefined.
- *
- * @param[in] code_object Code object. The handle becomes invalid after it has
- * been destroyed.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_code_object_destroy(
-    hsa_code_object_t code_object);
-
-/**
- * @deprecated
- *
- * @brief Code object type.
- */
-typedef enum {
-  /**
-   * Produces code object that contains ISA for all kernels and indirect
-   * functions in HSA source.
-   */
-  HSA_CODE_OBJECT_TYPE_PROGRAM = 0
-} hsa_code_object_type_t;
-
-/**
- * @deprecated
- *
- * @brief Code object attributes.
- */
-typedef enum {
-  /**
-   * The version of the code object. The type of this attribute is a
-   * NUL-terminated char[64]. The name must be at most 63 characters long (not
-   * including the NUL terminator) and all array elements not used for the name
-   * must be NUL.
-   */
-  HSA_CODE_OBJECT_INFO_VERSION = 0,
-  /**
-   * Type of code object. The type of this attribute is
-   * ::hsa_code_object_type_t.
-   */
-  HSA_CODE_OBJECT_INFO_TYPE = 1,
-  /**
-   * Instruction set architecture this code object is produced for. The type of
-   * this attribute is ::hsa_isa_t.
-   */
-  HSA_CODE_OBJECT_INFO_ISA = 2,
-  /**
-   * Machine model this code object is produced for. The type of this attribute
-   * is ::hsa_machine_model_t.
-   */
-  HSA_CODE_OBJECT_INFO_MACHINE_MODEL = 3,
-  /**
-   * Profile this code object is produced for. The type of this attribute is
-   * ::hsa_profile_t.
-   */
-  HSA_CODE_OBJECT_INFO_PROFILE = 4,
-  /**
-   * Default floating-point rounding mode used when the code object is
-   * produced. The type of this attribute is
-   * ::hsa_default_float_rounding_mode_t.
-   */
-  HSA_CODE_OBJECT_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 5
-} hsa_code_object_info_t;
-
-/**
- * @deprecated
- *
- * @brief Get the current value of an attribute for a given code object.
- *
- * @param[in] code_object Code object.
- *
- * @param[in] attribute Attribute to query.
- *
- * @param[out] value Pointer to an application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
- * code object attribute, or @p value is NULL.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_code_object_get_info(
-    hsa_code_object_t code_object,
-    hsa_code_object_info_t attribute,
-    void *value);
-
-/**
- * @deprecated
- *
- * @brief Load code object into the executable.
- *
- * @details Every global or readonly variable that is external must be defined
- * before loading the code object. An internal global or readonly variable is
- * allocated once the code object, that is being loaded, references this
- * variable and this variable is not allocated.
- *
- * Any module linkage declaration must have been defined either by a define
- * variable or by loading a code object that has a symbol with module linkage
- * definition.
- *
- * @param[in] executable Executable.
- *
- * @param[in] agent Agent to load code object for. The agent must support the
- * default floating-point rounding mode used by @p code_object.
- *
- * @param[in] code_object Code object to load.  The lifetime of the code object
- * must exceed that of the executable: if @p code_object is destroyed before @p
- * executable, the behavior is undefined.
- *
- * @param[in] options Standard and vendor-specific options. Unknown options are
- * ignored. A standard option begins with the "-hsa_" prefix. Options beginning
- * with the "-hsa_ext_<extension_name>_" prefix are reserved for extensions. A
- * vendor-specific option begins with the "-<vendor_name>_" prefix. Must be a
- * NUL-terminated string. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
- * allocate the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS @p agent is not compatible
- * with @p code_object (for example, @p agent does not support the default
- * floating-point rounding mode specified by @p code_object), or @p code_object
- * is not compatible with @p executable (for example, @p code_object and @p
- * executable have different machine models or profiles).
- *
- * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is frozen.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_executable_load_code_object(
-    hsa_executable_t executable,
-    hsa_agent_t agent,
-    hsa_code_object_t code_object,
-    const char *options);
-
-/**
- * @deprecated
- *
- * @brief Code object symbol handle.
- *
- * The lifetime of a code object symbol matches that of the code object
- * associated with it. An operation on a symbol whose associated code object has
- * been destroyed results in undefined behavior.
- */
-typedef struct hsa_code_symbol_s {
-  /**
-   * Opaque handle. Two handles reference the same object of the enclosing type
-   * if and only if they are equal.
-   */
-  uint64_t handle;
-} hsa_code_symbol_t;
-
-/**
- * @deprecated
- *
- * @brief Get the symbol handle within a code object for a given a symbol name.
- *
- * @param[in] code_object Code object.
- *
- * @param[in] symbol_name Symbol name.
- *
- * @param[out] symbol Memory location where the HSA runtime stores the symbol
- * handle.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no symbol with a name
- * that matches @p symbol_name.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p symbol_name is NULL, or
- * @p symbol is NULL.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_code_object_get_symbol(
-    hsa_code_object_t code_object,
-    const char *symbol_name,
-    hsa_code_symbol_t *symbol);
-
-/**
- * @deprecated
- *
- * @brief Get the symbol handle within a code object for a given a symbol name.
- *
- * @param[in] code_object Code object.
- *
- * @param[in] module_name Module name. Must be NULL if the symbol has
- * program linkage.
- *
- * @param[in] symbol_name Symbol name.
- *
- * @param[out] symbol Memory location where the HSA runtime stores the symbol
- * handle.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no symbol with a name
- * that matches @p symbol_name.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p symbol_name is NULL, or
- * @p symbol is NULL.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_code_object_get_symbol_from_name(
-    hsa_code_object_t code_object,
-    const char *module_name,
-    const char *symbol_name,
-    hsa_code_symbol_t *symbol);
-
-/**
- * @deprecated
- *
- * @brief Code object symbol attributes.
- */
-typedef enum {
-  /**
-   * The type of the symbol. The type of this attribute is ::hsa_symbol_kind_t.
-   */
-  HSA_CODE_SYMBOL_INFO_TYPE = 0,
-  /**
-   * The length of the symbol name in bytes, not including the NUL terminator.
-   * The type of this attribute is uint32_t.
-   */
-  HSA_CODE_SYMBOL_INFO_NAME_LENGTH = 1,
-  /**
-   * The name of the symbol. The type of this attribute is character array with
-   * the length equal to the value of ::HSA_CODE_SYMBOL_INFO_NAME_LENGTH
-   * attribute.
-   */
-  HSA_CODE_SYMBOL_INFO_NAME = 2,
-  /**
-   * The length of the module name in bytes (not including the NUL terminator)
-   * to which this symbol belongs if this symbol has module linkage, otherwise 0
-   * is returned. The type of this attribute is uint32_t.
-   */
-  HSA_CODE_SYMBOL_INFO_MODULE_NAME_LENGTH = 3,
-  /**
-   * The module name to which this symbol belongs if this symbol has module
-   * linkage, otherwise an empty string is returned. The type of this attribute
-   * is character array with the length equal to the value of
-   * ::HSA_CODE_SYMBOL_INFO_MODULE_NAME_LENGTH attribute.
-   */
-  HSA_CODE_SYMBOL_INFO_MODULE_NAME = 4,
-  /**
-   * The linkage kind of the symbol. The type of this attribute is
-   * ::hsa_symbol_linkage_t.
-   */
-  HSA_CODE_SYMBOL_INFO_LINKAGE = 5,
-  /**
-   * Indicates whether the symbol corresponds to a definition. The type of this
-   * attribute is bool.
-   */
-  HSA_CODE_SYMBOL_INFO_IS_DEFINITION = 17,
-  /**
-   * The allocation kind of the variable. The value of this attribute is
-   * undefined if the symbol is not a variable. The type of this attribute is
-   * ::hsa_variable_allocation_t.
-   */
-  HSA_CODE_SYMBOL_INFO_VARIABLE_ALLOCATION = 6,
-  /**
-   * The segment kind of the variable. The value of this attribute is
-   * undefined if the symbol is not a variable. The type of this attribute is
-   * ::hsa_variable_segment_t.
-   */
-  HSA_CODE_SYMBOL_INFO_VARIABLE_SEGMENT = 7,
-  /**
-   * Alignment of the symbol in memory. The value of this attribute is undefined
-   * if the symbol is not a variable. The type of this attribute is uint32_t.
-   *
-   * The current alignment of the variable in memory may be greater than the
-   * value specified in the source program variable declaration.
-   */
-  HSA_CODE_SYMBOL_INFO_VARIABLE_ALIGNMENT = 8,
-  /**
-   * Size of the variable. The value of this attribute is undefined if the
-   * symbol is not a variable. The type of this attribute is uint32_t.
-   *
-   * A size of 0 is returned if the variable is an external variable and has an
-   * unknown dimension.
-   */
-  HSA_CODE_SYMBOL_INFO_VARIABLE_SIZE = 9,
-  /**
-   * Indicates whether the variable is constant. The value of this attribute is
-   * undefined if the symbol is not a variable. The type of this attribute is
-   * bool.
-   */
-  HSA_CODE_SYMBOL_INFO_VARIABLE_IS_CONST = 10,
-  /**
-   * Size of kernarg segment memory that is required to hold the values of the
-   * kernel arguments, in bytes. Must be a multiple of 16. The value of this
-   * attribute is undefined if the symbol is not a kernel. The type of this
-   * attribute is uint32_t.
-   */
-  HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE = 11,
-  /**
-   * Alignment (in bytes) of the buffer used to pass arguments to the kernel,
-   * which is the maximum of 16 and the maximum alignment of any of the kernel
-   * arguments. The value of this attribute is undefined if the symbol is not a
-   * kernel. The type of this attribute is uint32_t.
-   */
-  HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT = 12,
-  /**
-   * Size of static group segment memory required by the kernel (per
-   * work-group), in bytes. The value of this attribute is undefined
-   * if the symbol is not a kernel. The type of this attribute is uint32_t.
-   *
-   * The reported amount does not include any dynamically allocated group
-   * segment memory that may be requested by the application when a kernel is
-   * dispatched.
-   */
-  HSA_CODE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE = 13,
-  /**
-   * Size of static private, spill, and arg segment memory required by
-   * this kernel (per work-item), in bytes. The value of this attribute is
-   * undefined if the symbol is not a kernel. The type of this attribute is
-   * uint32_t.
-   *
-   * If the value of ::HSA_CODE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK is true,
-   * the kernel may use more private memory than the reported value, and the
-   * application must add the dynamic call stack usage to @a
-   * private_segment_size when populating a kernel dispatch packet.
-   */
-  HSA_CODE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE = 14,
-  /**
-   * Dynamic callstack flag. The value of this attribute is undefined if the
-   * symbol is not a kernel. The type of this attribute is bool.
-   *
-   * If this flag is set (the value is true), the kernel uses a dynamically
-   * sized call stack. This can happen if recursive calls, calls to indirect
-   * functions, or the HSAIL alloca instruction are present in the kernel.
-   */
-  HSA_CODE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK = 15,
-  /**
-   * Call convention of the kernel. The value of this attribute is undefined if
-   * the symbol is not a kernel. The type of this attribute is uint32_t.
-   */
-  HSA_CODE_SYMBOL_INFO_KERNEL_CALL_CONVENTION = 18,
-  /**
-   * Call convention of the indirect function. The value of this attribute is
-   * undefined if the symbol is not an indirect function. The type of this
-   * attribute is uint32_t.
-   */
-  HSA_CODE_SYMBOL_INFO_INDIRECT_FUNCTION_CALL_CONVENTION = 16
-} hsa_code_symbol_info_t;
-
-/**
- * @deprecated
- *
- * @brief Get the current value of an attribute for a given code symbol.
- *
- * @param[in] code_symbol Code symbol.
- *
- * @param[in] attribute Attribute to query.
- *
- * @param[out] value Pointer to an application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_CODE_SYMBOL The code symbol is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
- * code symbol attribute, or @p value is NULL.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_code_symbol_get_info(
-    hsa_code_symbol_t code_symbol,
-    hsa_code_symbol_info_t attribute,
-    void *value);
-
-/**
- * @deprecated
- *
- * @brief Iterate over the symbols in a code object, and invoke an
- * application-defined callback on every iteration.
- *
- * @param[in] code_object Code object.
- *
- * @param[in] callback Callback to be invoked once per code object symbol. The
- * HSA runtime passes three arguments to the callback: the code object, a
- * symbol, and the application data.  If @p callback returns a status other than
- * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
- * ::hsa_code_object_iterate_symbols returns that status value.
- *
- * @param[in] data Application data that is passed to @p callback on every
- * iteration. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
- */
-hsa_status_t HSA_API HSA_DEPRECATED hsa_code_object_iterate_symbols(
-    hsa_code_object_t code_object,
-    hsa_status_t (*callback)(hsa_code_object_t code_object,
-                             hsa_code_symbol_t symbol,
-                             void *data),
-    void *data);
-
-/** @} */
-
-#ifdef __cplusplus
-}  // end extern "C" block
-#endif
-
-#endif  // header guard
diff --git a/third_party/rocm/include/hsa/hsa_api_trace.h b/third_party/rocm/include/hsa/hsa_api_trace.h
deleted file mode 100644
index 5c33f07..0000000
--- a/third_party/rocm/include/hsa/hsa_api_trace.h
+++ /dev/null
@@ -1,474 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-//
-// The University of Illinois/NCSA
-// Open Source License (NCSA)
-//
-// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
-//
-// Developed by:
-//
-//                 AMD Research and AMD HSA Software Development
-//
-//                 Advanced Micro Devices, Inc.
-//
-//                 www.amd.com
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to
-// deal with the Software without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-// and/or sell copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following conditions:
-//
-//  - Redistributions of source code must retain the above copyright notice,
-//    this list of conditions and the following disclaimers.
-//  - Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimers in
-//    the documentation and/or other materials provided with the distribution.
-//  - Neither the names of Advanced Micro Devices, Inc,
-//    nor the names of its contributors may be used to endorse or promote
-//    products derived from this Software without specific prior written
-//    permission.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-// DEALINGS WITH THE SOFTWARE.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef HSA_RUNTIME_INC_HSA_API_TRACE_H
-#define HSA_RUNTIME_INC_HSA_API_TRACE_H
-
-#include "hsa.h"
-#ifdef AMD_INTERNAL_BUILD
-#include "hsa_ext_image.h"
-#include "hsa_ext_amd.h"
-#include "hsa_ext_finalize.h"
-#else
-#include "inc/hsa_ext_image.h"
-#include "inc/hsa_ext_amd.h"
-#include "inc/hsa_ext_finalize.h"
-#endif
-
-#include <string.h>
-#include <assert.h>
-#include <stddef.h>
-
-// Major Ids of the Api tables exported by Hsa Core Runtime
-#define HSA_API_TABLE_MAJOR_VERSION               0x01
-#define HSA_CORE_API_TABLE_MAJOR_VERSION          0x01
-#define HSA_AMD_EXT_API_TABLE_MAJOR_VERSION       0x01
-#define HSA_FINALIZER_API_TABLE_MAJOR_VERSION     0x01
-#define HSA_IMAGE_API_TABLE_MAJOR_VERSION         0x01
-#define HSA_AQLPROFILE_API_TABLE_MAJOR_VERSION    0x01
-
-// Step Ids of the Api tables exported by Hsa Core Runtime
-#define HSA_API_TABLE_STEP_VERSION                0x00
-#define HSA_CORE_API_TABLE_STEP_VERSION           0x00
-#define HSA_AMD_EXT_API_TABLE_STEP_VERSION        0x00
-#define HSA_FINALIZER_API_TABLE_STEP_VERSION      0x00
-#define HSA_IMAGE_API_TABLE_STEP_VERSION          0x00
-#define HSA_AQLPROFILE_API_TABLE_STEP_VERSION     0x00
-
-// Min function used to copy Api Tables
-static inline uint32_t Min(const uint32_t a, const uint32_t b) {
-  return (a > b) ? b : a;
-}
-
-// Declarations of APIs intended for use only by tools.
-typedef void (*hsa_amd_queue_intercept_packet_writer)(const void* pkts, uint64_t pkt_count);
-typedef void (*hsa_amd_queue_intercept_handler)(const void* pkts, uint64_t pkt_count,
-                                                uint64_t user_pkt_index, void* data,
-                                                hsa_amd_queue_intercept_packet_writer writer);
-hsa_status_t hsa_amd_queue_intercept_register(hsa_queue_t* queue,
-                                              hsa_amd_queue_intercept_handler callback,
-                                              void* user_data);
-hsa_status_t hsa_amd_queue_intercept_create(
-    hsa_agent_t agent_handle, uint32_t size, hsa_queue_type32_t type,
-    void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), void* data,
-    uint32_t private_segment_size, uint32_t group_segment_size, hsa_queue_t** queue);
-
-typedef void (*hsa_amd_runtime_queue_notifier)(const hsa_queue_t* queue, hsa_agent_t agent,
-                                               void* data);
-hsa_status_t hsa_amd_runtime_queue_create_register(hsa_amd_runtime_queue_notifier callback,
-                                                   void* user_data);
-
-// Structure of Version used to identify an instance of Api table
-// Must be the first member (offsetof == 0) of all API tables.
-// This is the root of the table passing ABI.
-struct ApiTableVersion {
-  uint32_t major_id;
-  uint32_t minor_id;
-  uint32_t step_id;
-  uint32_t reserved;
-};
-
-// Table to export HSA Finalizer Extension Apis
-struct FinalizerExtTable {
-  ApiTableVersion version;
-	decltype(hsa_ext_program_create)* hsa_ext_program_create_fn;
-	decltype(hsa_ext_program_destroy)* hsa_ext_program_destroy_fn;
-	decltype(hsa_ext_program_add_module)* hsa_ext_program_add_module_fn;
-	decltype(hsa_ext_program_iterate_modules)* hsa_ext_program_iterate_modules_fn;
-	decltype(hsa_ext_program_get_info)* hsa_ext_program_get_info_fn;
-	decltype(hsa_ext_program_finalize)* hsa_ext_program_finalize_fn;
-};
-
-// Table to export HSA Image Extension Apis
-struct ImageExtTable {
-  ApiTableVersion version;
-	decltype(hsa_ext_image_get_capability)* hsa_ext_image_get_capability_fn;
-	decltype(hsa_ext_image_data_get_info)* hsa_ext_image_data_get_info_fn;
-	decltype(hsa_ext_image_create)* hsa_ext_image_create_fn;
-	decltype(hsa_ext_image_import)* hsa_ext_image_import_fn;
-	decltype(hsa_ext_image_export)* hsa_ext_image_export_fn;
-	decltype(hsa_ext_image_copy)* hsa_ext_image_copy_fn;
-	decltype(hsa_ext_image_clear)* hsa_ext_image_clear_fn;
-	decltype(hsa_ext_image_destroy)* hsa_ext_image_destroy_fn;
-	decltype(hsa_ext_sampler_create)* hsa_ext_sampler_create_fn;
-	decltype(hsa_ext_sampler_destroy)* hsa_ext_sampler_destroy_fn;
-  decltype(hsa_ext_image_get_capability_with_layout)* hsa_ext_image_get_capability_with_layout_fn;
-  decltype(hsa_ext_image_data_get_info_with_layout)* hsa_ext_image_data_get_info_with_layout_fn;
-  decltype(hsa_ext_image_create_with_layout)* hsa_ext_image_create_with_layout_fn;
-};
-
-// Table to export AMD Extension Apis
-struct AmdExtTable {
-  ApiTableVersion version;
-	decltype(hsa_amd_coherency_get_type)* hsa_amd_coherency_get_type_fn;
-	decltype(hsa_amd_coherency_set_type)* hsa_amd_coherency_set_type_fn;
-  decltype(hsa_amd_profiling_set_profiler_enabled)* hsa_amd_profiling_set_profiler_enabled_fn;
-  decltype(hsa_amd_profiling_async_copy_enable) *hsa_amd_profiling_async_copy_enable_fn;
-  decltype(hsa_amd_profiling_get_dispatch_time)* hsa_amd_profiling_get_dispatch_time_fn;
-  decltype(hsa_amd_profiling_get_async_copy_time) *hsa_amd_profiling_get_async_copy_time_fn;
-  decltype(hsa_amd_profiling_convert_tick_to_system_domain)* hsa_amd_profiling_convert_tick_to_system_domain_fn;
-  decltype(hsa_amd_signal_async_handler)* hsa_amd_signal_async_handler_fn;
-  decltype(hsa_amd_async_function)* hsa_amd_async_function_fn;
-  decltype(hsa_amd_signal_wait_any)* hsa_amd_signal_wait_any_fn;
-  decltype(hsa_amd_queue_cu_set_mask)* hsa_amd_queue_cu_set_mask_fn;
-  decltype(hsa_amd_memory_pool_get_info)* hsa_amd_memory_pool_get_info_fn;
-  decltype(hsa_amd_agent_iterate_memory_pools)* hsa_amd_agent_iterate_memory_pools_fn;
-  decltype(hsa_amd_memory_pool_allocate)* hsa_amd_memory_pool_allocate_fn;
-  decltype(hsa_amd_memory_pool_free)* hsa_amd_memory_pool_free_fn;
-  decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy_fn;
-  decltype(hsa_amd_agent_memory_pool_get_info)* hsa_amd_agent_memory_pool_get_info_fn;
-  decltype(hsa_amd_agents_allow_access)* hsa_amd_agents_allow_access_fn;
-  decltype(hsa_amd_memory_pool_can_migrate)* hsa_amd_memory_pool_can_migrate_fn;
-  decltype(hsa_amd_memory_migrate)* hsa_amd_memory_migrate_fn;
-  decltype(hsa_amd_memory_lock)* hsa_amd_memory_lock_fn;
-  decltype(hsa_amd_memory_unlock)* hsa_amd_memory_unlock_fn;
-  decltype(hsa_amd_memory_fill)* hsa_amd_memory_fill_fn;
-  decltype(hsa_amd_interop_map_buffer)* hsa_amd_interop_map_buffer_fn;
-  decltype(hsa_amd_interop_unmap_buffer)* hsa_amd_interop_unmap_buffer_fn;
-  decltype(hsa_amd_image_create)* hsa_amd_image_create_fn;
-  decltype(hsa_amd_pointer_info)* hsa_amd_pointer_info_fn;
-  decltype(hsa_amd_pointer_info_set_userdata)* hsa_amd_pointer_info_set_userdata_fn;
-  decltype(hsa_amd_ipc_memory_create)* hsa_amd_ipc_memory_create_fn;
-  decltype(hsa_amd_ipc_memory_attach)* hsa_amd_ipc_memory_attach_fn;
-  decltype(hsa_amd_ipc_memory_detach)* hsa_amd_ipc_memory_detach_fn;
-  decltype(hsa_amd_signal_create)* hsa_amd_signal_create_fn;
-  decltype(hsa_amd_ipc_signal_create)* hsa_amd_ipc_signal_create_fn;
-  decltype(hsa_amd_ipc_signal_attach)* hsa_amd_ipc_signal_attach_fn;
-  decltype(hsa_amd_register_system_event_handler)* hsa_amd_register_system_event_handler_fn;
-  decltype(hsa_amd_queue_intercept_create)* hsa_amd_queue_intercept_create_fn;
-  decltype(hsa_amd_queue_intercept_register)* hsa_amd_queue_intercept_register_fn;
-  decltype(hsa_amd_queue_set_priority)* hsa_amd_queue_set_priority_fn;
-  decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect_fn;
-  decltype(hsa_amd_runtime_queue_create_register)* hsa_amd_runtime_queue_create_register_fn;
-  decltype(hsa_amd_memory_lock_to_pool)* hsa_amd_memory_lock_to_pool_fn;
-  decltype(hsa_amd_register_deallocation_callback)* hsa_amd_register_deallocation_callback_fn;
-  decltype(hsa_amd_deregister_deallocation_callback)* hsa_amd_deregister_deallocation_callback_fn;
-};
-
-// Table to export HSA Core Runtime Apis
-struct CoreApiTable {
-  ApiTableVersion version;
-  decltype(hsa_init)* hsa_init_fn;
-  decltype(hsa_shut_down)* hsa_shut_down_fn;
-  decltype(hsa_system_get_info)* hsa_system_get_info_fn;
-  decltype(hsa_system_extension_supported)* hsa_system_extension_supported_fn;
-  decltype(hsa_system_get_extension_table)* hsa_system_get_extension_table_fn;
-  decltype(hsa_iterate_agents)* hsa_iterate_agents_fn;
-  decltype(hsa_agent_get_info)* hsa_agent_get_info_fn;
-  decltype(hsa_queue_create)* hsa_queue_create_fn;
-  decltype(hsa_soft_queue_create)* hsa_soft_queue_create_fn;
-  decltype(hsa_queue_destroy)* hsa_queue_destroy_fn;
-  decltype(hsa_queue_inactivate)* hsa_queue_inactivate_fn;
-  decltype(hsa_queue_load_read_index_scacquire)* hsa_queue_load_read_index_scacquire_fn;
-  decltype(hsa_queue_load_read_index_relaxed)* hsa_queue_load_read_index_relaxed_fn;
-  decltype(hsa_queue_load_write_index_scacquire)* hsa_queue_load_write_index_scacquire_fn;
-  decltype(hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed_fn;
-  decltype(hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed_fn;
-  decltype(hsa_queue_store_write_index_screlease)* hsa_queue_store_write_index_screlease_fn;
-  decltype(hsa_queue_cas_write_index_scacq_screl)* hsa_queue_cas_write_index_scacq_screl_fn;
-  decltype(hsa_queue_cas_write_index_scacquire)* hsa_queue_cas_write_index_scacquire_fn;
-  decltype(hsa_queue_cas_write_index_relaxed)* hsa_queue_cas_write_index_relaxed_fn;
-  decltype(hsa_queue_cas_write_index_screlease)* hsa_queue_cas_write_index_screlease_fn;
-  decltype(hsa_queue_add_write_index_scacq_screl)* hsa_queue_add_write_index_scacq_screl_fn;
-  decltype(hsa_queue_add_write_index_scacquire)* hsa_queue_add_write_index_scacquire_fn;
-  decltype(hsa_queue_add_write_index_relaxed)* hsa_queue_add_write_index_relaxed_fn;
-  decltype(hsa_queue_add_write_index_screlease)* hsa_queue_add_write_index_screlease_fn;
-  decltype(hsa_queue_store_read_index_relaxed)* hsa_queue_store_read_index_relaxed_fn;
-  decltype(hsa_queue_store_read_index_screlease)* hsa_queue_store_read_index_screlease_fn;
-  decltype(hsa_agent_iterate_regions)* hsa_agent_iterate_regions_fn;
-  decltype(hsa_region_get_info)* hsa_region_get_info_fn;
-  decltype(hsa_agent_get_exception_policies)* hsa_agent_get_exception_policies_fn;
-  decltype(hsa_agent_extension_supported)* hsa_agent_extension_supported_fn;
-  decltype(hsa_memory_register)* hsa_memory_register_fn;
-  decltype(hsa_memory_deregister)* hsa_memory_deregister_fn;
-  decltype(hsa_memory_allocate)* hsa_memory_allocate_fn;
-  decltype(hsa_memory_free)* hsa_memory_free_fn;
-  decltype(hsa_memory_copy)* hsa_memory_copy_fn;
-  decltype(hsa_memory_assign_agent)* hsa_memory_assign_agent_fn;
-  decltype(hsa_signal_create)* hsa_signal_create_fn;
-  decltype(hsa_signal_destroy)* hsa_signal_destroy_fn;
-  decltype(hsa_signal_load_relaxed)* hsa_signal_load_relaxed_fn;
-  decltype(hsa_signal_load_scacquire)* hsa_signal_load_scacquire_fn;
-  decltype(hsa_signal_store_relaxed)* hsa_signal_store_relaxed_fn;
-  decltype(hsa_signal_store_screlease)* hsa_signal_store_screlease_fn;
-  decltype(hsa_signal_wait_relaxed)* hsa_signal_wait_relaxed_fn;
-  decltype(hsa_signal_wait_scacquire)* hsa_signal_wait_scacquire_fn;
-  decltype(hsa_signal_and_relaxed)* hsa_signal_and_relaxed_fn;
-  decltype(hsa_signal_and_scacquire)* hsa_signal_and_scacquire_fn;
-  decltype(hsa_signal_and_screlease)* hsa_signal_and_screlease_fn;
-  decltype(hsa_signal_and_scacq_screl)* hsa_signal_and_scacq_screl_fn;
-  decltype(hsa_signal_or_relaxed)* hsa_signal_or_relaxed_fn;
-  decltype(hsa_signal_or_scacquire)* hsa_signal_or_scacquire_fn;
-  decltype(hsa_signal_or_screlease)* hsa_signal_or_screlease_fn;
-  decltype(hsa_signal_or_scacq_screl)* hsa_signal_or_scacq_screl_fn;
-  decltype(hsa_signal_xor_relaxed)* hsa_signal_xor_relaxed_fn;
-  decltype(hsa_signal_xor_scacquire)* hsa_signal_xor_scacquire_fn;
-  decltype(hsa_signal_xor_screlease)* hsa_signal_xor_screlease_fn;
-  decltype(hsa_signal_xor_scacq_screl)* hsa_signal_xor_scacq_screl_fn;
-  decltype(hsa_signal_exchange_relaxed)* hsa_signal_exchange_relaxed_fn;
-  decltype(hsa_signal_exchange_scacquire)* hsa_signal_exchange_scacquire_fn;
-  decltype(hsa_signal_exchange_screlease)* hsa_signal_exchange_screlease_fn;
-  decltype(hsa_signal_exchange_scacq_screl)* hsa_signal_exchange_scacq_screl_fn;
-  decltype(hsa_signal_add_relaxed)* hsa_signal_add_relaxed_fn;
-  decltype(hsa_signal_add_scacquire)* hsa_signal_add_scacquire_fn;
-  decltype(hsa_signal_add_screlease)* hsa_signal_add_screlease_fn;
-  decltype(hsa_signal_add_scacq_screl)* hsa_signal_add_scacq_screl_fn;
-  decltype(hsa_signal_subtract_relaxed)* hsa_signal_subtract_relaxed_fn;
-  decltype(hsa_signal_subtract_scacquire)* hsa_signal_subtract_scacquire_fn;
-  decltype(hsa_signal_subtract_screlease)* hsa_signal_subtract_screlease_fn;
-  decltype(hsa_signal_subtract_scacq_screl)* hsa_signal_subtract_scacq_screl_fn;
-  decltype(hsa_signal_cas_relaxed)* hsa_signal_cas_relaxed_fn;
-  decltype(hsa_signal_cas_scacquire)* hsa_signal_cas_scacquire_fn;
-  decltype(hsa_signal_cas_screlease)* hsa_signal_cas_screlease_fn;
-  decltype(hsa_signal_cas_scacq_screl)* hsa_signal_cas_scacq_screl_fn;
-
-  //===--- Instruction Set Architecture -----------------------------------===//
-
-  decltype(hsa_isa_from_name)* hsa_isa_from_name_fn;
-  // Deprecated since v1.1.
-  decltype(hsa_isa_get_info)* hsa_isa_get_info_fn;
-  // Deprecated since v1.1.
-  decltype(hsa_isa_compatible)* hsa_isa_compatible_fn;
-
-  //===--- Code Objects (deprecated) --------------------------------------===//
-
-  // Deprecated since v1.1.
-  decltype(hsa_code_object_serialize)* hsa_code_object_serialize_fn;
-  // Deprecated since v1.1.
-  decltype(hsa_code_object_deserialize)* hsa_code_object_deserialize_fn;
-  // Deprecated since v1.1.
-  decltype(hsa_code_object_destroy)* hsa_code_object_destroy_fn;
-  // Deprecated since v1.1.
-  decltype(hsa_code_object_get_info)* hsa_code_object_get_info_fn;
-  // Deprecated since v1.1.
-  decltype(hsa_code_object_get_symbol)* hsa_code_object_get_symbol_fn;
-  // Deprecated since v1.1.
-  decltype(hsa_code_symbol_get_info)* hsa_code_symbol_get_info_fn;
-  // Deprecated since v1.1.
-  decltype(hsa_code_object_iterate_symbols)* hsa_code_object_iterate_symbols_fn;
-
-  //===--- Executable -----------------------------------------------------===//
-
-  // Deprecated since v1.1.
-  decltype(hsa_executable_create)* hsa_executable_create_fn;
-  decltype(hsa_executable_destroy)* hsa_executable_destroy_fn;
-  // Deprecated since v1.1.
-  decltype(hsa_executable_load_code_object)* hsa_executable_load_code_object_fn;
-  decltype(hsa_executable_freeze)* hsa_executable_freeze_fn;
-  decltype(hsa_executable_get_info)* hsa_executable_get_info_fn;
-  decltype(hsa_executable_global_variable_define)*
-      hsa_executable_global_variable_define_fn;
-  decltype(hsa_executable_agent_global_variable_define)*
-      hsa_executable_agent_global_variable_define_fn;
-  decltype(hsa_executable_readonly_variable_define)*
-      hsa_executable_readonly_variable_define_fn;
-  decltype(hsa_executable_validate)* hsa_executable_validate_fn;
-  // Deprecated since v1.1.
-  decltype(hsa_executable_get_symbol)* hsa_executable_get_symbol_fn;
-  decltype(hsa_executable_symbol_get_info)* hsa_executable_symbol_get_info_fn;
-  // Deprecated since v1.1.
-  decltype(hsa_executable_iterate_symbols)* hsa_executable_iterate_symbols_fn;
-
-  //===--- Runtime Notifications ------------------------------------------===//
-
-  decltype(hsa_status_string)* hsa_status_string_fn;
-
-  // Start HSA v1.1 additions
-  decltype(hsa_extension_get_name)* hsa_extension_get_name_fn;
-  decltype(hsa_system_major_extension_supported)* hsa_system_major_extension_supported_fn;
-  decltype(hsa_system_get_major_extension_table)* hsa_system_get_major_extension_table_fn;
-  decltype(hsa_agent_major_extension_supported)* hsa_agent_major_extension_supported_fn;
-  decltype(hsa_cache_get_info)* hsa_cache_get_info_fn;
-  decltype(hsa_agent_iterate_caches)* hsa_agent_iterate_caches_fn;
-  decltype(hsa_signal_silent_store_relaxed)* hsa_signal_silent_store_relaxed_fn;
-  decltype(hsa_signal_silent_store_screlease)* hsa_signal_silent_store_screlease_fn;
-  decltype(hsa_signal_group_create)* hsa_signal_group_create_fn;
-  decltype(hsa_signal_group_destroy)* hsa_signal_group_destroy_fn;
-  decltype(hsa_signal_group_wait_any_scacquire)* hsa_signal_group_wait_any_scacquire_fn;
-  decltype(hsa_signal_group_wait_any_relaxed)* hsa_signal_group_wait_any_relaxed_fn;
-
-  //===--- Instruction Set Architecture - HSA v1.1 additions --------------===//
-
-  decltype(hsa_agent_iterate_isas)* hsa_agent_iterate_isas_fn;
-  decltype(hsa_isa_get_info_alt)* hsa_isa_get_info_alt_fn;
-  decltype(hsa_isa_get_exception_policies)* hsa_isa_get_exception_policies_fn;
-  decltype(hsa_isa_get_round_method)* hsa_isa_get_round_method_fn;
-  decltype(hsa_wavefront_get_info)* hsa_wavefront_get_info_fn;
-  decltype(hsa_isa_iterate_wavefronts)* hsa_isa_iterate_wavefronts_fn;
-
-  //===--- Code Objects (deprecated) - HSA v1.1 additions -----------------===//
-
-  // Deprecated since v1.1.
-  decltype(hsa_code_object_get_symbol_from_name)*
-      hsa_code_object_get_symbol_from_name_fn;
-
-  //===--- Executable - HSA v1.1 additions --------------------------------===//
-
-  decltype(hsa_code_object_reader_create_from_file)*
-      hsa_code_object_reader_create_from_file_fn;
-  decltype(hsa_code_object_reader_create_from_memory)*
-      hsa_code_object_reader_create_from_memory_fn;
-  decltype(hsa_code_object_reader_destroy)* hsa_code_object_reader_destroy_fn;
-  decltype(hsa_executable_create_alt)* hsa_executable_create_alt_fn;
-  decltype(hsa_executable_load_program_code_object)*
-      hsa_executable_load_program_code_object_fn;
-  decltype(hsa_executable_load_agent_code_object)*
-      hsa_executable_load_agent_code_object_fn;
-  decltype(hsa_executable_validate_alt)* hsa_executable_validate_alt_fn;
-  decltype(hsa_executable_get_symbol_by_name)*
-      hsa_executable_get_symbol_by_name_fn;
-  decltype(hsa_executable_iterate_agent_symbols)*
-      hsa_executable_iterate_agent_symbols_fn;
-  decltype(hsa_executable_iterate_program_symbols)*
-      hsa_executable_iterate_program_symbols_fn;
-};
-
-// Table to export HSA Apis from Core Runtime, Amd Extensions
-// Finalizer and Images
-struct HsaApiTable {
-
-  // Version of Hsa Api Table
-  ApiTableVersion version;
-
-  // Table of function pointers to HSA Core Runtime
-	CoreApiTable* core_;
-
-  // Table of function pointers to AMD extensions
-	AmdExtTable* amd_ext_;
-
-  // Table of function pointers to HSA Finalizer Extension
-	FinalizerExtTable* finalizer_ext_;
-
-  // Table of function pointers to HSA Image Extension
-	ImageExtTable* image_ext_;
-};
-
-// Structure containing instances of different api tables
-struct HsaApiTableContainer {
-  HsaApiTable root;
-	CoreApiTable core;
-	AmdExtTable amd_ext;
-	FinalizerExtTable finalizer_ext;
-	ImageExtTable image_ext;
-
-  // Default initialization of a container instance
-  HsaApiTableContainer() {
-    root.version.major_id = HSA_API_TABLE_MAJOR_VERSION;
-    root.version.minor_id = sizeof(HsaApiTable);
-    root.version.step_id = HSA_API_TABLE_STEP_VERSION;
-
-    core.version.major_id = HSA_CORE_API_TABLE_MAJOR_VERSION;
-    core.version.minor_id = sizeof(CoreApiTable);
-    core.version.step_id = HSA_CORE_API_TABLE_STEP_VERSION;
-    root.core_ = &core;
-
-    amd_ext.version.major_id = HSA_AMD_EXT_API_TABLE_MAJOR_VERSION;
-    amd_ext.version.minor_id = sizeof(AmdExtTable);
-    amd_ext.version.step_id = HSA_AMD_EXT_API_TABLE_STEP_VERSION;
-    root.amd_ext_ = &amd_ext;
-
-    finalizer_ext.version.major_id = HSA_FINALIZER_API_TABLE_MAJOR_VERSION;
-    finalizer_ext.version.minor_id = sizeof(FinalizerExtTable);
-    finalizer_ext.version.step_id = HSA_FINALIZER_API_TABLE_STEP_VERSION;
-    root.finalizer_ext_ = & finalizer_ext;
-
-    image_ext.version.major_id = HSA_IMAGE_API_TABLE_MAJOR_VERSION;
-    image_ext.version.minor_id = sizeof(ImageExtTable);
-    image_ext.version.step_id = HSA_IMAGE_API_TABLE_STEP_VERSION;
-    root.image_ext_ = &image_ext;
-  }
-};
-
-// Api to copy function pointers of a table
-static
-void inline copyApi(void* src, void* dest, size_t size) {
-  assert(size >= sizeof(ApiTableVersion));
-  memcpy((char*)src + sizeof(ApiTableVersion),
-         (char*)dest + sizeof(ApiTableVersion),
-         (size - sizeof(ApiTableVersion)));
-}
-
-// Copy Api child tables if valid.
-static void inline copyElement(ApiTableVersion* dest, ApiTableVersion* src) {
-  if (src->major_id && (dest->major_id == src->major_id)) {
-    dest->step_id = src->step_id;
-    dest->minor_id = Min(dest->minor_id, src->minor_id);
-    copyApi(dest, src, dest->minor_id);
-  } else {
-    dest->major_id = 0;
-    dest->minor_id = 0;
-    dest->step_id = 0;
-  }
-}
-
-// Copy constructor for all Api tables. The function assumes the
-// user has initialized an instance of tables container correctly
-// for the Major, Minor and Stepping Ids of Root and Child Api tables.
-// The function will overwrite the value of Minor Id by taking the
-// minimum of source and destination parameters. It will also overwrite
-// the stepping Id with value from source parameter.
-static void inline copyTables(const HsaApiTable* src, HsaApiTable* dest) {
-  // Verify Major Id of source and destination tables match
-  if (dest->version.major_id != src->version.major_id) {
-    dest->version.major_id = 0;
-    dest->version.minor_id = 0;
-    dest->version.step_id = 0;
-    return;
-  }
-
-  // Initialize the stepping id and minor id of root table. For the
-  // minor id which encodes struct size, take the minimum of source
-  // and destination parameters
-  dest->version.step_id = src->version.step_id;
-  dest->version.minor_id = Min(dest->version.minor_id, src->version.minor_id);
-
-  // Copy child tables if present
-  if ((offsetof(HsaApiTable, core_) < dest->version.minor_id))
-    copyElement(&dest->core_->version, &src->core_->version);
-  if ((offsetof(HsaApiTable, amd_ext_) < dest->version.minor_id))
-    copyElement(&dest->amd_ext_->version, &src->amd_ext_->version);
-  if ((offsetof(HsaApiTable, finalizer_ext_) < dest->version.minor_id))
-    copyElement(&dest->finalizer_ext_->version, &src->finalizer_ext_->version);
-  if ((offsetof(HsaApiTable, image_ext_) < dest->version.minor_id))
-    copyElement(&dest->image_ext_->version, &src->image_ext_->version);
-}
-#endif
diff --git a/third_party/rocm/include/hsa/hsa_ext_amd.h b/third_party/rocm/include/hsa/hsa_ext_amd.h
deleted file mode 100644
index 04a6e4d..0000000
--- a/third_party/rocm/include/hsa/hsa_ext_amd.h
+++ /dev/null
@@ -1,1983 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-//
-// The University of Illinois/NCSA
-// Open Source License (NCSA)
-//
-// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
-//
-// Developed by:
-//
-//                 AMD Research and AMD HSA Software Development
-//
-//                 Advanced Micro Devices, Inc.
-//
-//                 www.amd.com
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to
-// deal with the Software without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-// and/or sell copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following conditions:
-//
-//  - Redistributions of source code must retain the above copyright notice,
-//    this list of conditions and the following disclaimers.
-//  - Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimers in
-//    the documentation and/or other materials provided with the distribution.
-//  - Neither the names of Advanced Micro Devices, Inc,
-//    nor the names of its contributors may be used to endorse or promote
-//    products derived from this Software without specific prior written
-//    permission.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-// DEALINGS WITH THE SOFTWARE.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-// HSA AMD extension.
-
-#ifndef HSA_RUNTIME_EXT_AMD_H_
-#define HSA_RUNTIME_EXT_AMD_H_
-
-#include "hsa.h"
-#include "hsa_ext_image.h"
-
-#define HSA_AMD_INTERFACE_VERSION_MAJOR 1
-#define HSA_AMD_INTERFACE_VERSION_MINOR 0
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/**
- * @brief Enumeration constants added to ::hsa_status_t.
- *
- * @remark Additions to hsa_status_t
- */
-enum {
-  /**
-   * The memory pool is invalid.
-   */
-  HSA_STATUS_ERROR_INVALID_MEMORY_POOL = 40,
-
-  /**
-   * Agent accessed memory beyond the maximum legal address.
-   */
-  HSA_STATUS_ERROR_MEMORY_APERTURE_VIOLATION = 41,
-
-  /**
-   * Agent executed an invalid shader instruction.
-   */
-  HSA_STATUS_ERROR_ILLEGAL_INSTRUCTION = 42,
-};
-
-/**
- * @brief Agent attributes.
- */
-typedef enum hsa_amd_agent_info_s {
-  /**
-   * Chip identifier. The type of this attribute is uint32_t.
-   */
-  HSA_AMD_AGENT_INFO_CHIP_ID = 0xA000,
-  /**
-   * Size of a cacheline in bytes. The type of this attribute is uint32_t.
-   */
-  HSA_AMD_AGENT_INFO_CACHELINE_SIZE = 0xA001,
-  /**
-   * The number of compute unit available in the agent. The type of this
-   * attribute is uint32_t.
-   */
-  HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT = 0xA002,
-  /**
-   * The maximum clock frequency of the agent in MHz. The type of this
-   * attribute is uint32_t.
-   */
-  HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY = 0xA003,
-  /**
-   * Internal driver node identifier. The type of this attribute is uint32_t.
-   */
-  HSA_AMD_AGENT_INFO_DRIVER_NODE_ID = 0xA004,
-  /**
-   * Max number of watch points on memory address ranges to generate exception
-   * events when the watched addresses are accessed.  The type of this
-   * attribute is uint32_t.
-   */
-  HSA_AMD_AGENT_INFO_MAX_ADDRESS_WATCH_POINTS = 0xA005,
-  /**
-   * Agent BDF_ID, named LocationID in thunk. The type of this attribute is
-   * uint32_t.
-   */
-  HSA_AMD_AGENT_INFO_BDFID = 0xA006,
-  /**
-   * Memory Interface width, the return value type is uint32_t.
-   * This attribute is deprecated.
-   */
-  HSA_AMD_AGENT_INFO_MEMORY_WIDTH = 0xA007,
-  /**
-   * Max Memory Clock, the return value type is uint32_t.
-   */
-  HSA_AMD_AGENT_INFO_MEMORY_MAX_FREQUENCY = 0xA008,
-  /**
-   * Board name of Agent - populated from MarketingName of Kfd Node
-   * The value is an Ascii string of 64 chars.
-   */
-  HSA_AMD_AGENT_INFO_PRODUCT_NAME = 0xA009,
-  /**
-   * Maximum number of waves possible in a Compute Unit.
-   * The type of this attribute is uint32_t.
-   */
-  HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU = 0xA00A,
-  /**
-   * Number of SIMD's per compute unit CU
-   * The type of this attribute is uint32_t.
-   */
-  HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU = 0xA00B,
-  /**
-   * Number of Shader Engines (SE) in Gpu
-   * The type of this attribute is uint32_t.
-   */
-  HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES = 0xA00C,
-  /**
-   * Number of Shader Arrays Per Shader Engines in Gpu
-   * The type of this attribute is uint32_t.
-   */
-  HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE = 0xA00D,
-  /**
-   * Address of the HDP flush registers.  Use of these registers does not conform to the HSA memory
-   * model and should be treated with caution.
-   * The type of this attribute is hsa_amd_hdp_flush_t.
-   */
-  HSA_AMD_AGENT_INFO_HDP_FLUSH = 0xA00E,
-  /**
-   * PCIe domain for the agent.  Pairs with HSA_AMD_AGENT_INFO_BDFID
-   * to give the full physical location of the Agent.
-   * The type of this attribute is uint32_t.
-   */
-  HSA_AMD_AGENT_INFO_DOMAIN = 0xA00F,
-  /**
-   * Queries for support of cooperative queues.  See ::HSA_QUEUE_TYPE_COOPERATIVE.
-   * The type of this attribute is bool.
-   */
-  HSA_AMD_AGENT_INFO_COOPERATIVE_QUEUES = 0xA010,
-  /**
-   * Queries UUID of an agent. The value is an Ascii string with a maximum
-   * of 21 chars including NUL. The string value consists of two parts: header
-   * and body. The header identifies device type (GPU, CPU, DSP) while body
-   * encodes UUID as a 16 digit hex string
-   *
-   * Agents that do not support UUID will return the string "GPU-XX" or
-   * "CPU-XX" or "DSP-XX" depending upon their device type ::hsa_device_type_t
-   */
-  HSA_AMD_AGENT_INFO_UUID = 0xA011,
-  /**
-   * Queries for the ASIC revision of an agent. The value is an integer that
-   * increments for each revision. This can be used by user-level software to
-   * change how it operates, depending on the hardware version. This allows
-   * selective workarounds for hardware errata.
-   * The type of this attribute is uint32_t.
-   */
-  HSA_AMD_AGENT_INFO_ASIC_REVISION = 0xA012
-} hsa_amd_agent_info_t;
-
-typedef struct hsa_amd_hdp_flush_s {
-  uint32_t* HDP_MEM_FLUSH_CNTL;
-  uint32_t* HDP_REG_FLUSH_CNTL;
-} hsa_amd_hdp_flush_t;
-
-/**
- * @brief Region attributes.
- */
-typedef enum hsa_amd_region_info_s {
-  /**
-   * Determine if host can access the region. The type of this attribute
-   * is bool.
-   */
-  HSA_AMD_REGION_INFO_HOST_ACCESSIBLE = 0xA000,
-  /**
-   * Base address of the region in flat address space.
-   */
-  HSA_AMD_REGION_INFO_BASE = 0xA001,
-  /**
-   * Memory Interface width, the return value type is uint32_t.
-   * This attribute is deprecated. Use HSA_AMD_AGENT_INFO_MEMORY_WIDTH.
-   */
-  HSA_AMD_REGION_INFO_BUS_WIDTH = 0xA002,
-  /**
-   * Max Memory Clock, the return value type is uint32_t.
-   * This attribute is deprecated. Use HSA_AMD_AGENT_INFO_MEMORY_MAX_FREQUENCY.
-   */
-  HSA_AMD_REGION_INFO_MAX_CLOCK_FREQUENCY = 0xA003
-} hsa_amd_region_info_t;
-
-/**
- * @brief Coherency attributes of fine grain region.
- */
-typedef enum hsa_amd_coherency_type_s {
-  /**
-   * Coherent region.
-   */
-  HSA_AMD_COHERENCY_TYPE_COHERENT = 0,
-  /**
-   * Non coherent region.
-   */
-  HSA_AMD_COHERENCY_TYPE_NONCOHERENT = 1
-} hsa_amd_coherency_type_t;
-
-/**
- * @brief Get the coherency type of the fine grain region of an agent.
- *
- * @param[in] agent A valid agent.
- *
- * @param[out] type Pointer to a memory location where the HSA runtime will
- * store the coherency type of the fine grain region.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p type is NULL.
- */
-hsa_status_t HSA_API hsa_amd_coherency_get_type(hsa_agent_t agent,
-                                                hsa_amd_coherency_type_t* type);
-
-/**
- * @brief Set the coherency type of the fine grain region of an agent.
- * Deprecated.  This is supported on KV platforms.  For backward compatibility
- * other platforms will spuriously succeed.
- *
- * @param[in] agent A valid agent.
- *
- * @param[in] type The coherency type to be set.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p type is invalid.
- */
-hsa_status_t HSA_API hsa_amd_coherency_set_type(hsa_agent_t agent,
-                                                hsa_amd_coherency_type_t type);
-
-/**
- * @brief Structure containing profiling dispatch time information.
- *
- * Times are reported as ticks in the domain of the HSA system clock.
- * The HSA system clock tick and frequency is obtained via hsa_system_get_info.
- */
-typedef struct hsa_amd_profiling_dispatch_time_s {
-  /**
-   * Dispatch packet processing start time.
-   */
-  uint64_t start;
-  /**
-   * Dispatch packet completion time.
-   */
-  uint64_t end;
-} hsa_amd_profiling_dispatch_time_t;
-
-/**
- * @brief Structure containing profiling async copy time information.
- *
- * Times are reported as ticks in the domain of the HSA system clock.
- * The HSA system clock tick and frequency is obtained via hsa_system_get_info.
- */
-typedef struct hsa_amd_profiling_async_copy_time_s {
-  /**
-   * Async copy processing start time.
-   */
-  uint64_t start;
-  /**
-   * Async copy completion time.
-   */
-  uint64_t end;
-} hsa_amd_profiling_async_copy_time_t;
-
-/**
- * @brief Enable or disable profiling capability of a queue.
- *
- * @param[in] queue A valid queue.
- *
- * @param[in] enable 1 to enable profiling. 0 to disable profiling.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE The queue is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p queue is NULL.
- */
-hsa_status_t HSA_API
-    hsa_amd_profiling_set_profiler_enabled(hsa_queue_t* queue, int enable);
-
-/**
- * @brief Enable or disable asynchronous memory copy profiling.
- *
- * @details The runtime will provide the copy processing start timestamp and
- * completion timestamp of each call to hsa_amd_memory_async_copy if the
- * async copy profiling is enabled prior to the call to
- * hsa_amd_memory_async_copy. The completion signal object is used to
- * hold the last async copy start and end timestamp. The client can retrieve
- * these timestamps via call to hsa_amd_profiling_get_async_copy_time.
- *
- * @param[in] enable True to enable profiling. False to disable profiling.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES Failed on allocating resources
- * needed to profile the asynchronous copy.
- */
-hsa_status_t HSA_API
-    hsa_amd_profiling_async_copy_enable(bool enable);
-
-/**
- * @brief Retrieve packet processing time stamps.
- *
- * @param[in] agent The agent with which the signal was last used.  For
- * instance, if the profiled dispatch packet is dispatched onto queue Q,
- * which was created on agent A, then this parameter must be A.
- *
- * @param[in] signal A signal used as the completion signal of the dispatch
- * packet to retrieve time stamps from.  This dispatch packet must have been
- * issued to a queue with profiling enabled and have already completed.  Also
- * the signal must not have yet been used in any other packet following the
- * completion of the profiled dispatch packet.
- *
- * @param[out] time Packet processing timestamps in the HSA system clock
- * domain.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_SIGNAL The signal is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p time is NULL.
- */
-hsa_status_t HSA_API hsa_amd_profiling_get_dispatch_time(
-    hsa_agent_t agent, hsa_signal_t signal,
-    hsa_amd_profiling_dispatch_time_t* time);
-
-/**
- * @brief Retrieve asynchronous copy timestamps.
- *
- * @details Async copy profiling is enabled via call to
- * hsa_amd_profiling_async_copy_enable.
- *
- * @param[in] signal A signal used as the completion signal of the call to
- * hsa_amd_memory_async_copy.
- *
- * @param[out] time Async copy processing timestamps in the HSA system clock
- * domain.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_SIGNAL The signal is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p time is NULL.
- */
-hsa_status_t HSA_API hsa_amd_profiling_get_async_copy_time(
-    hsa_signal_t signal, hsa_amd_profiling_async_copy_time_t* time);
-
-/**
- * @brief Computes the frequency ratio and offset between the agent clock and
- * HSA system clock and converts the agent's tick to HSA system domain tick.
- *
- * @param[in] agent The agent used to retrieve the agent_tick. It is user's
- * responsibility to make sure the tick number is from this agent, otherwise,
- * the behavior is undefined.
- *
- * @param[in] agent_tick The tick count retrieved from the specified @p agent.
- *
- * @param[out] system_tick The translated HSA system domain clock counter tick.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p system_tick is NULL;
- */
-hsa_status_t HSA_API
-    hsa_amd_profiling_convert_tick_to_system_domain(hsa_agent_t agent,
-                                                    uint64_t agent_tick,
-                                                    uint64_t* system_tick);
-
-/**
- * @brief Signal attribute flags.
- */
-typedef enum {
-  /**
-   * Signal will only be consumed by AMD GPUs.  Limits signal consumption to
-   * AMD GPU agents only.  Ignored if @p num_consumers is not zero (all agents).
-   */
-  HSA_AMD_SIGNAL_AMD_GPU_ONLY = 1,
-  /**
-   * Signal may be used for interprocess communication.
-   * IPC signals can be read, written, and waited on from any process.
-   * Profiling using an IPC enabled signal is only supported in a single process
-   * at a time.  Producing profiling data in one process and consuming it in
-   * another process is undefined.
-   */
-  HSA_AMD_SIGNAL_IPC = 2,
-} hsa_amd_signal_attribute_t;
-
-/**
- * @brief Create a signal with specific attributes.
- *
- * @param[in] initial_value Initial value of the signal.
- *
- * @param[in] num_consumers Size of @p consumers. A value of 0 indicates that
- * any agent might wait on the signal.
- *
- * @param[in] consumers List of agents that might consume (wait on) the
- * signal. If @p num_consumers is 0, this argument is ignored; otherwise, the
- * HSA runtime might use the list to optimize the handling of the signal
- * object. If an agent not listed in @p consumers waits on the returned
- * signal, the behavior is undefined. The memory associated with @p consumers
- * can be reused or freed after the function returns.
- *
- * @param[in] attributes Requested signal attributes.  Multiple signal attributes
- * may be requested by combining them with bitwise OR.  Requesting no attributes
- * (@p attributes == 0) results in the same signal as would have been obtained
- * via hsa_signal_create.
- *
- * @param[out] signal Pointer to a memory location where the HSA runtime will
- * store the newly created signal handle. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
- * the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p signal is NULL, @p
- * num_consumers is greater than 0 but @p consumers is NULL, or @p consumers
- * contains duplicates.
- */
-hsa_status_t HSA_API hsa_amd_signal_create(hsa_signal_value_t initial_value, uint32_t num_consumers,
-                                           const hsa_agent_t* consumers, uint64_t attributes,
-                                           hsa_signal_t* signal);
-
-/**
- * @brief Asyncronous signal handler function type.
- *
- * @details Type definition of callback function to be used with
- * hsa_amd_signal_async_handler. This callback is invoked if the associated
- * signal and condition are met. The callback receives the value of the signal
- * which satisfied the associated wait condition and a user provided value. If
- * the callback returns true then the callback will be called again if the
- * associated signal and condition are satisfied again. If the callback returns
- * false then it will not be called again.
- *
- * @param[in] value Contains the value of the signal observed by
- * hsa_amd_signal_async_handler which caused the signal handler to be invoked.
- *
- * @param[in] arg Contains the user provided value given when the signal handler
- * was registered with hsa_amd_signal_async_handler
- *
- * @retval true resumes monitoring the signal with this handler (as if calling
- * hsa_amd_signal_async_handler again with identical parameters)
- *
- * @retval false stops monitoring the signal with this handler (handler will
- * not be called again for this signal)
- *
- */
-typedef bool (*hsa_amd_signal_handler)(hsa_signal_value_t value, void* arg);
-
-/**
- * @brief Register asynchronous signal handler function.
- *
- * @details Allows registering a callback function and user provided value with
- * a signal and wait condition. The callback will be invoked if the associated
- * signal and wait condition are satisfied. Callbacks will be invoked serially
- * but in an arbitrary order so callbacks should be independent of each other.
- * After being invoked a callback may continue to wait for its associated signal
- * and condition and, possibly, be invoked again. Or the callback may stop
- * waiting. If the callback returns true then it will continue waiting and may
- * be called again. If false then the callback will not wait again and will not
- * be called again for the associated signal and condition. It is possible to
- * register the same callback multiple times with the same or different signals
- * and/or conditions. Each registration of the callback will be treated entirely
- * independently.
- *
- * @param[in] signal hsa signal to be asynchronously monitored
- *
- * @param[in] cond condition value to monitor for
- *
- * @param[in] value signal value used in condition expression
- *
- * @param[in] handler asynchronous signal handler invoked when signal's
- * condition is met
- *
- * @param[in] arg user provided value which is provided to handler when handler
- * is invoked
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_SIGNAL signal is not a valid hsa_signal_t
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT handler is invalid (NULL)
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime is out of
- * resources or blocking signals are not supported by the HSA driver component.
- *
- */
-hsa_status_t HSA_API
-    hsa_amd_signal_async_handler(hsa_signal_t signal,
-                                 hsa_signal_condition_t cond,
-                                 hsa_signal_value_t value,
-                                 hsa_amd_signal_handler handler, void* arg);
-
-/**
- * @brief Call a function asynchronously
- *
- * @details Provides access to the runtime's asynchronous event handling thread
- * for general asynchronous functions.  Functions queued this way are executed
- * in the same manner as if they were a signal handler who's signal is
- * satisfied.
- *
- * @param[in] callback asynchronous function to be invoked
- *
- * @param[in] arg user provided value which is provided to handler when handler
- * is invoked
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT handler is invalid (NULL)
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime is out of
- * resources or blocking signals are not supported by the HSA driver component.
- *
- */
-hsa_status_t HSA_API
-    hsa_amd_async_function(void (*callback)(void* arg), void* arg);
-
-/**
- * @brief Wait for any signal-condition pair to be satisfied.
- *
- * @details Allows waiting for any of several signal and conditions pairs to be
- * satisfied. The function returns the index into the list of signals of the
- * first satisfying signal-condition pair. The value of the satisfying signal's
- * value is returned in satisfying_value unless satisfying_value is NULL. This
- * function provides only relaxed memory semantics.
- */
-uint32_t HSA_API
-    hsa_amd_signal_wait_any(uint32_t signal_count, hsa_signal_t* signals,
-                            hsa_signal_condition_t* conds,
-                            hsa_signal_value_t* values, uint64_t timeout_hint,
-                            hsa_wait_state_t wait_hint,
-                            hsa_signal_value_t* satisfying_value);
-
-/**
- * @brief Query image limits.
- *
- * @param[in] agent A valid agent.
- *
- * @param[in] attribute HSA image info attribute to query.
- *
- * @param[out] value Pointer to an application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE @p value is NULL or @p attribute <
- * HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS or @p attribute >
- * HSA_EXT_AGENT_INFO_IMAGE_ARRAY_MAX_LAYERS.
- *
- */
-hsa_status_t HSA_API hsa_amd_image_get_info_max_dim(hsa_agent_t agent,
-                                                    hsa_agent_info_t attribute,
-                                                    void* value);
-
-/**
- * @brief Set a CU affinity to specific queues within the process, this function
- * call is "atomic".
- *
- * @param[in] queue A pointer to HSA queue.
- *
- * @param[in] num_cu_mask_count Size of CUMask bit array passed in.
- *
- * @param[in] cu_mask Bit-vector representing the CU mask.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE @p queue is NULL or invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p num_cu_mask_count is not
- * multiple of 32 or @p cu_mask is NULL.
- *
- * @retval ::HSA_STATUS_ERROR failed to call thunk api
- *
- */
-hsa_status_t HSA_API hsa_amd_queue_cu_set_mask(const hsa_queue_t* queue,
-                                               uint32_t num_cu_mask_count,
-                                               const uint32_t* cu_mask);
-
-/**
- * @brief Memory segments associated with a memory pool.
- */
-typedef enum {
-  /**
-   * Global segment. Used to hold data that is shared by all agents.
-   */
-  HSA_AMD_SEGMENT_GLOBAL = 0,
-  /**
-   * Read-only segment. Used to hold data that remains constant during the
-   * execution of a kernel.
-   */
-  HSA_AMD_SEGMENT_READONLY = 1,
-  /**
-   * Private segment. Used to hold data that is local to a single work-item.
-   */
-  HSA_AMD_SEGMENT_PRIVATE = 2,
-  /**
-   * Group segment. Used to hold data that is shared by the work-items of a
-   * work-group.
-   */
-  HSA_AMD_SEGMENT_GROUP = 3,
-} hsa_amd_segment_t;
-
-/**
- * @brief A memory pool encapsulates physical storage on an agent
- * along with a memory access model.
- *
- * @details A memory pool encapsulates a physical partition of an agent's
- * memory system along with a memory access model.  Division of a single
- * memory system into separate pools allows querying each partition's access
- * path properties (see ::hsa_amd_agent_memory_pool_get_info). Allocations
- * from a pool are preferentially bound to that pool's physical partition.
- * Binding to the pool's preferential physical partition may not be
- * possible or persistent depending on the system's memory policy
- * and/or state which is beyond the scope of HSA APIs.
- *
- * For example, a multi-node NUMA memory system may be represented by multiple
- * pool's with each pool providing size and access path information for the
- * partition it represents.  Allocations from a pool are preferentially bound
- * to the pool's partition (which in this example is a NUMA node) while
- * following its memory access model. The actual placement may vary or migrate
- * due to the system's NUMA policy and state, which is beyond the scope of
- * HSA APIs.
- */ 
-typedef struct hsa_amd_memory_pool_s {
-  /**
-   * Opaque handle.
-   */
-  uint64_t handle;
-} hsa_amd_memory_pool_t;
-
-typedef enum hsa_amd_memory_pool_global_flag_s {
-  /**
-   * The application can use allocations in the memory pool to store kernel
-   * arguments, and provide the values for the kernarg segment of
-   * a kernel dispatch.
-   */
-  HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT = 1,
-  /**
-   * Updates to memory in this pool conform to HSA memory consistency model.
-   * If this flag is set, then ::HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED
-   * must not be set.
-   */
-  HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED = 2,
-  /**
-   * Writes to memory in this pool can be performed by a single agent at a time.
-   */
-  HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED = 4
-} hsa_amd_memory_pool_global_flag_t;
-
-/**
- * @brief Memory pool features.
- */
-typedef enum {
-  /**
-  * Segment where the memory pool resides. The type of this attribute is
-  * ::hsa_amd_segment_t.
-  */
-  HSA_AMD_MEMORY_POOL_INFO_SEGMENT = 0,
-  /**
-  * Flag mask. The value of this attribute is undefined if the value of
-  * ::HSA_AMD_MEMORY_POOL_INFO_SEGMENT is not ::HSA_AMD_SEGMENT_GLOBAL. The type
-  * of
-  * this attribute is uint32_t, a bit-field of
-  * ::hsa_amd_memory_pool_global_flag_t
-  * values.
-  */
-  HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS = 1,
-  /**
-  * Size of this pool, in bytes. The type of this attribute is size_t.
-  */
-  HSA_AMD_MEMORY_POOL_INFO_SIZE = 2,
-  /**
-  * Indicates whether memory in this pool can be allocated using
-  * ::hsa_amd_memory_pool_allocate. The type of this attribute is bool.
-  *
-  * The value of this flag is always false for memory pools in the group and
-  * private segments.
-  */
-  HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED = 5,
-  /**
-  * Allocation granularity of buffers allocated by
-  * ::hsa_amd_memory_pool_allocate
-  * in this memory pool. The size of a buffer allocated in this pool is a
-  * multiple of the value of this attribute. The value of this attribute is
-  * only defined if ::HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED is true for
-  * this pool. The type of this attribute is size_t.
-  */
-  HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE = 6,
-  /**
-  * Alignment of buffers allocated by ::hsa_amd_memory_pool_allocate in this
-  * pool. The value of this attribute is only defined if
-  * ::HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED is true for this pool, and
-  * must be a power of 2. The type of this attribute is size_t.
-  */
-  HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT = 7,
-  /**
-  * This memory_pool can be made directly accessible by all the agents in the
-  * system (::hsa_amd_agent_memory_pool_get_info does not return 
-  * ::HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED for any agent). The type of this
-  * attribute is bool.
-  */
-  HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL = 15,
-  /**
-  * Maximum aggregate allocation size in bytes. The type of this attribute
-  * is size_t.
-  */
-  HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE = 16,
-} hsa_amd_memory_pool_info_t;
-
-/**
- * @brief Get the current value of an attribute of a memory pool.
- *
- * @param[in] memory_pool A valid memory pool.
- *
- * @param[in] attribute Attribute to query.
- *
- * @param[out] value Pointer to a application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- */
-hsa_status_t HSA_API
-    hsa_amd_memory_pool_get_info(hsa_amd_memory_pool_t memory_pool,
-                                 hsa_amd_memory_pool_info_t attribute,
-                                 void* value);
-
-/**
- * @brief Iterate over the memory pools associated with a given agent, and
- * invoke an application-defined callback on every iteration.
- *
- * @details An agent can directly access buffers located in some memory pool, or
- * be enabled to access them by the application (see ::hsa_amd_agents_allow_access),
- * yet that memory pool may not be returned by this function for that given
- * agent.
- *
- * A memory pool of fine-grained type must be associated only with the host.
- *
- * @param[in] agent A valid agent.
- *
- * @param[in] callback Callback to be invoked on the same thread that called
- * ::hsa_amd_agent_iterate_memory_pools, serially, once per memory pool that is
- * associated with the agent.  The HSA runtime passes two arguments to the
- * callback: the memory pool, and the application data.  If @p callback
- * returns a status other than ::HSA_STATUS_SUCCESS for a particular iteration,
- * the traversal stops and ::hsa_amd_agent_iterate_memory_pools returns that status
- * value.
- *
- * @param[in] data Application data that is passed to @p callback on every
- * iteration. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
- */
-hsa_status_t HSA_API hsa_amd_agent_iterate_memory_pools(
-    hsa_agent_t agent,
-    hsa_status_t (*callback)(hsa_amd_memory_pool_t memory_pool, void* data),
-    void* data);
-
-/**
- * @brief Allocate a block of memory (or buffer) in the specified pool.
- *
- * @param[in] memory_pool Memory pool where to allocate memory from. The memory
- * pool must have the ::HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED flag set.
- *
- * @param[in] size Allocation size, in bytes. Must not be zero. This value is
- * rounded up to the nearest multiple of
- * ::HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE in @p memory_pool.
- *
- * @param[in] flags A bit-field that is used to specify allocation
- * directives. Reserved parameter, must be 0.
- *
- * @param[out] ptr Pointer to the location where to store the base virtual
- * address of
- * the allocated block. The returned base address is aligned to the value of
- * ::HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT in @p memory_pool. If the
- * allocation fails, the returned value is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES No memory is available.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_MEMORY_POOL The memory pool is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION The host is not allowed to
- * allocate memory in @p memory_pool, or @p size is greater than
- * the value of HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE in @p memory_pool.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is NULL, or @p size is 0,
- * or flags is not 0.
- *
- */
-hsa_status_t HSA_API
-    hsa_amd_memory_pool_allocate(hsa_amd_memory_pool_t memory_pool, size_t size,
-                                 uint32_t flags, void** ptr);
-
-/**
- * @brief Deallocate a block of memory previously allocated using
- * ::hsa_amd_memory_pool_allocate.
- *
- * @param[in] ptr Pointer to a memory block. If @p ptr does not match a value
- * previously returned by ::hsa_amd_memory_pool_allocate, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- */
-hsa_status_t HSA_API hsa_amd_memory_pool_free(void* ptr);
-
-/**
- * @brief Asynchronously copy a block of memory from the location pointed to by
- * @p src on the @p src_agent to the memory block pointed to by @p dst on the @p
- * dst_agent.
- * Because the DMA engines used may not be in the same coherency domain, the caller must ensure
- * that buffers are system-level coherent. In general this requires the sending device to have
- * released the buffer to system scope prior to executing the copy API and the receiving device
- * must execute a system scope acquire fence prior to use of the destination buffer.
- *
- * @param[out] dst Buffer where the content is to be copied.
- *
- * @param[in] dst_agent Agent associated with the @p dst. The agent must be able to directly
- * access both the source and destination buffers in their current locations.
- *
- * @param[in] src A valid pointer to the source of data to be copied. The source
- * buffer must not overlap with the destination buffer, otherwise the copy will succeed
- * but contents of @p dst is undefined.
- *
- * @param[in] src_agent Agent associated with the @p src. The agent must be able to directly
- * access both the source and destination buffers in their current locations.
- *
- * @param[in] size Number of bytes to copy. If @p size is 0, no copy is
- * performed and the function returns success. Copying a number of bytes larger
- * than the size of the buffers pointed by @p dst or @p src results in undefined
- * behavior.
- *
- * @param[in] num_dep_signals Number of dependent signals. Can be 0.
- *
- * @param[in] dep_signals List of signals that must be waited on before the copy
- * operation starts. The copy will start after every signal has been observed with
- * the value 0. The dependent signal should not include completion signal from hsa_amd_memory_async_copy
- * operation to be issued in future as that can result in a deadlock. If @p num_dep_signals is 0, this
- * argument is ignored.
- *
- * @param[in] completion_signal Signal used to indicate completion of the copy
- * operation. When the copy operation is finished, the value of the signal is
- * decremented. The runtime indicates that an error has occurred during the copy
- * operation by setting the value of the completion signal to a negative
- * number. The signal handle must not be 0.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. The
- * application is responsible for checking for asynchronous error conditions
- * (see the description of @p completion_signal).
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_SIGNAL @p completion_signal is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT The source or destination
- * pointers are NULL, or the completion signal is 0.
- */
-hsa_status_t HSA_API
-    hsa_amd_memory_async_copy(void* dst, hsa_agent_t dst_agent, const void* src,
-                              hsa_agent_t src_agent, size_t size,
-                              uint32_t num_dep_signals,
-                              const hsa_signal_t* dep_signals,
-                              hsa_signal_t completion_signal);
-
-/*
-[Provisional API]
-Pitched memory descriptor.
-All elements must be 4 byte aligned.  Pitch and slice are in bytes.
-*/
-typedef struct hsa_pitched_ptr_s {
-  void* base;
-  size_t pitch;
-  size_t slice;
-} hsa_pitched_ptr_t;
-
-/*
-[Provisional API]
-Copy direction flag.
-*/
-typedef enum {
-  hsaHostToHost = 0,
-  hsaHostToDevice = 1,
-  hsaDeviceToHost = 2,
-  hsaDeviceToDevice = 3
-} hsa_amd_copy_direction_t;
-
-/*
-[Provisional API]
-SDMA 3D memory copy API.  The same requirements must be met by src and dst as in
-hsa_amd_memory_async_copy.
-Both src and dst must be directly accessible to the copy_agent during the copy, src and dst rects
-must not overlap.
-CPU agents are not supported.  API requires SDMA and will return an error if SDMA is not available.
-Offsets and range carry x in bytes, y and z in rows and layers.
-*/
-hsa_status_t HSA_API hsa_amd_memory_async_copy_rect(
-    const hsa_pitched_ptr_t* dst, const hsa_dim3_t* dst_offset, const hsa_pitched_ptr_t* src,
-    const hsa_dim3_t* src_offset, const hsa_dim3_t* range, hsa_agent_t copy_agent,
-    hsa_amd_copy_direction_t dir, uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
-    hsa_signal_t completion_signal);
-
-/**
- * @brief Type of accesses to a memory pool from a given agent.
- */
-typedef enum {
-  /**
-  * The agent cannot directly access any buffer in the memory pool.
-  */
-  HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED = 0,
-  /**
-  * The agent can directly access a buffer located in the pool; the application
-  * does not need to invoke ::hsa_amd_agents_allow_access.
-  */
-  HSA_AMD_MEMORY_POOL_ACCESS_ALLOWED_BY_DEFAULT = 1,
-  /**
-  * The agent can directly access a buffer located in the pool, but only if the
-  * application has previously requested access to that buffer using
-  * ::hsa_amd_agents_allow_access.
-  */
-  HSA_AMD_MEMORY_POOL_ACCESS_DISALLOWED_BY_DEFAULT = 2
-} hsa_amd_memory_pool_access_t;
-
-/**
- * @brief Properties of the relationship between an agent a memory pool.
- */
-typedef enum {
-  /**
-  * Hyper-transport bus type.
-  */
-  HSA_AMD_LINK_INFO_TYPE_HYPERTRANSPORT = 0,
-
-  /**
-  * QPI bus type.
-  */
-  HSA_AMD_LINK_INFO_TYPE_QPI = 1,
-
-  /**
-  * PCIe bus type.
-  */
-  HSA_AMD_LINK_INFO_TYPE_PCIE = 2,
-
-  /**
-  * Infiniband bus type.
-  */
-  HSA_AMD_LINK_INFO_TYPE_INFINBAND = 3,
-
-  /**
-  * xGMI link type.
-  */
-  HSA_AMD_LINK_INFO_TYPE_XGMI = 4
-
-} hsa_amd_link_info_type_t;
-
-/**
- * @brief Link properties when accessing the memory pool from the specified
- * agent.
- */
-typedef struct hsa_amd_memory_pool_link_info_s {
-  /**
-  * Minimum transfer latency (rounded to ns).
-  */
-  uint32_t min_latency;
-
-  /**
-  * Maximum transfer latency (rounded to ns).
-  */
-  uint32_t max_latency;
-
-  /**
-  * Minimum link interface bandwidth in MB/s.
-  */
-  uint32_t min_bandwidth;
-
-  /**
-  * Maximum link interface bandwidth in MB/s.
-  */
-  uint32_t max_bandwidth;
-
-  /**
-  * Support for 32-bit atomic transactions.
-  */
-  bool atomic_support_32bit;
-
-  /**
-  * Support for 64-bit atomic transactions.
-  */
-  bool atomic_support_64bit;
-
-  /**
-  * Support for cache coherent transactions.
-  */
-  bool coherent_support;
-
-  /**
-  * The type of bus/link.
-  */
-  hsa_amd_link_info_type_t link_type;
-
-  /**
-   * NUMA distance of memory pool relative to querying agent
-   */
-  uint32_t numa_distance;
-} hsa_amd_memory_pool_link_info_t;
-
-/**
- * @brief Properties of the relationship between an agent a memory pool.
- */
-typedef enum {
-  /**
-  * Access to buffers located in the memory pool. The type of this attribute
-  * is ::hsa_amd_memory_pool_access_t.
-  *
-  * An agent can always directly access buffers currently located in a memory
-  * pool that is associated (the memory_pool is one of the values returned by
-  * ::hsa_amd_agent_iterate_memory_pools on the agent) with that agent. If the
-  * buffer is currently located in a memory pool that is not associated with
-  * the agent, and the value returned by this function for the given
-  * combination of agent and memory pool is not
-  * HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED, the application still needs to invoke
-  * ::hsa_amd_agents_allow_access in order to gain direct access to the buffer.
-  *
-  * If the given agent can directly access buffers the pool, the result is not
-  * HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED. If the memory pool is associated with
-  * the agent, or it is of fined-grained type, the result must not be
-  * HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED. If the memory pool is not associated
-  * with the agent, and does not reside in the global segment, the result must
-  * be HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED.
-  */
-  HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS = 0,
-
-  /**
-  * Number of links to hop when accessing the memory pool from the specified
-  * agent. The value of this attribute is zero if the memory pool is associated
-  * with the agent, or if the access type is
-  * HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED. The type of this attribute is
-  * uint32_t.
-  */
-  HSA_AMD_AGENT_MEMORY_POOL_INFO_NUM_LINK_HOPS = 1,
-
-  /**
-  * Details of each link hop when accessing the memory pool starting from the
-  * specified agent. The type of this attribute is an array size of
-  * HSA_AMD_AGENT_MEMORY_POOL_INFO_NUM_LINK_HOPS with each element containing
-  * ::hsa_amd_memory_pool_link_info_t.
-  */
-  HSA_AMD_AGENT_MEMORY_POOL_INFO_LINK_INFO = 2
-
-} hsa_amd_agent_memory_pool_info_t;
-
-/**
- * @brief Get the current value of an attribute of the relationship between an
- * agent and a memory pool.
- *
- * @param[in] agent Agent.
- *
- * @param[in] memory_pool Memory pool.
- *
- * @param[in] attribute Attribute to query.
- *
- * @param[out] value Pointer to a application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- */
-hsa_status_t HSA_API hsa_amd_agent_memory_pool_get_info(
-    hsa_agent_t agent, hsa_amd_memory_pool_t memory_pool,
-    hsa_amd_agent_memory_pool_info_t attribute, void* value);
-
-/**
- * @brief Enable direct access to a buffer from a given set of agents.
- *
- * @details
- *
- * Upon return, only the listed agents and the agent associated with the
- * buffer's memory pool have direct access to the @p ptr.
- *
- * Any agent that has access to the buffer before and after the call to
- * ::hsa_amd_agents_allow_access will also have access while
- * ::hsa_amd_agents_allow_access is in progress.
- *
- * The caller is responsible for ensuring that each agent in the list
- * must be able to access the memory pool containing @p ptr
- * (using ::hsa_amd_agent_memory_pool_get_info with ::HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS attribute),
- * otherwise error code is returned.
- *
- * @param[in] num_agents Size of @p agents.
- *
- * @param[in] agents List of agents. If @p num_agents is 0, this argument is
- * ignored.
- *
- * @param[in] flags A list of bit-field that is used to specify access
- * information in a per-agent basis. This is currently reserved and must be NULL.
- *
- * @param[in] ptr A buffer previously allocated using ::hsa_amd_memory_pool_allocate.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p num_agents is 0, or @p agents
- * is NULL, @p flags is not NULL, or attempting to enable access to agent(s)
- * because @p ptr is allocated from an inaccessible pool.
- *
- */
-hsa_status_t HSA_API
-    hsa_amd_agents_allow_access(uint32_t num_agents, const hsa_agent_t* agents,
-                                const uint32_t* flags, const void* ptr);
-
-/**
- * @brief Query if buffers currently located in some memory pool can be
- * relocated to a destination memory pool.
- *
- * @details If the returned value is non-zero, a migration of a buffer to @p
- * dst_memory_pool using ::hsa_amd_memory_migrate may nevertheless fail due to
- * resource limitations.
- *
- * @param[in] src_memory_pool Source memory pool.
- *
- * @param[in] dst_memory_pool Destination memory pool.
- *
- * @param[out] result Pointer to a memory location where the result of the query
- * is stored. Must not be NULL. If buffers currently located in @p
- * src_memory_pool can be relocated to @p dst_memory_pool, the result is
- * true.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_MEMORY_POOL One of the memory pools is
- * invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p result is NULL.
- */
-hsa_status_t HSA_API
-    hsa_amd_memory_pool_can_migrate(hsa_amd_memory_pool_t src_memory_pool,
-                                    hsa_amd_memory_pool_t dst_memory_pool,
-                                    bool* result);
-
-/**
- * @brief Relocate a buffer to a new memory pool.
- *
- * @details When a buffer is migrated, its virtual address remains the same but
- * its physical contents are moved to the indicated memory pool.
- *
- * After migration, only the agent associated with the destination pool will have access.
- *
- * The caller is also responsible for ensuring that the allocation in the
- * source memory pool where the buffer is currently located can be migrated to the
- * specified destination memory pool (using ::hsa_amd_memory_pool_can_migrate returns a value of true
- * for the source and destination memory pools), otherwise behavior is undefined.
- *
- * The caller must ensure that the buffer is not accessed while it is migrated.
- *
- * @param[in] ptr Buffer to be relocated. The buffer must have been released to system
- * prior to call this API.  The buffer will be released to system upon completion.
- *
- * @param[in] memory_pool Memory pool where to place the buffer.
- *
- * @param[in] flags A bit-field that is used to specify migration
- * information. Must be zero.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_MEMORY_POOL The destination memory pool is
- * invalid.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure in
- * allocating the necessary resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p flags is not 0.
- */
-hsa_status_t HSA_API hsa_amd_memory_migrate(const void* ptr,
-                                            hsa_amd_memory_pool_t memory_pool,
-                                            uint32_t flags);
-
-/**
- *
- * @brief Pin a host pointer allocated by C/C++ or OS allocator (i.e. ordinary system DRAM) and
- * return a new pointer accessible by the @p agents. If the @p host_ptr overlaps with previously
- * locked memory, then the overlap area is kept locked (i.e multiple mappings are permitted). In
- * this case, the same input @p host_ptr may give different locked @p agent_ptr and when it does,
- * they are not necessarily coherent (i.e. accessing either @p agent_ptr is not equivalent).
- * Accesses to @p agent_ptr are coarse grained.
- *
- * @param[in] host_ptr A buffer allocated by C/C++ or OS allocator.
- *
- * @param[in] size The size to be locked.
- *
- * @param[in] agents Array of agent handle to gain access to the @p host_ptr.
- * If this parameter is NULL and the @p num_agent is 0, all agents
- * in the platform will gain access to the @p host_ptr.
- *
- * @param[out] agent_ptr Pointer to the location where to store the new address.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure in
- * allocating the necessary resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT One or more agent in @p agents is
- * invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p size is 0 or @p host_ptr or
- * @p agent_ptr is NULL or @p agents not NULL but @p num_agent is 0 or @p agents
- * is NULL but @p num_agent is not 0.
- */
-hsa_status_t HSA_API hsa_amd_memory_lock(void* host_ptr, size_t size,
-                                         hsa_agent_t* agents, int num_agent,
-                                         void** agent_ptr);
-
-/**
- *
- * @brief Pin a host pointer allocated by C/C++ or OS allocator (i.e. ordinary system DRAM) and
- * return a new pointer accessible by the @p agents. If the @p host_ptr overlaps with previously
- * locked memory, then the overlap area is kept locked (i.e. multiple mappings are permitted).
- * In this case, the same input @p host_ptr may give different locked @p agent_ptr and when it
- * does, they are not necessarily coherent (i.e. accessing either @p agent_ptr is not equivalent).
- * Acesses to the memory via @p agent_ptr have the same access properties as memory allocated from
- * @p pool as determined by ::hsa_amd_memory_pool_get_info and ::hsa_amd_agent_memory_pool_get_info
- * (ex. coarse/fine grain, platform atomic support, link info).  Physical composition and placement
- * of the memory (ex. page size, NUMA binding) is not changed.
- *
- * @param[in] host_ptr A buffer allocated by C/C++ or OS allocator.
- *
- * @param[in] size The size to be locked.
- *
- * @param[in] agents Array of agent handle to gain access to the @p host_ptr.
- * If this parameter is NULL and the @p num_agent is 0, all agents
- * in the platform will gain access to the @p host_ptr.
- *
- * @param[in] pool Global memory pool owned by a CPU agent.
- *
- * @param[in] flags A bit-field that is used to specify allocation
- * directives. Reserved parameter, must be 0.
- *
- * @param[out] agent_ptr Pointer to the location where to store the new address.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure in
- * allocating the necessary resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT One or more agent in @p agents is
- * invalid or can not access @p pool.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_MEMORY_POOL @p pool is invalid or not owned
- * by a CPU agent.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p size is 0 or @p host_ptr or
- * @p agent_ptr is NULL or @p agents not NULL but @p num_agent is 0 or @p agents
- * is NULL but @p num_agent is not 0 or flags is not 0.
- */
-hsa_status_t HSA_API hsa_amd_memory_lock_to_pool(void* host_ptr, size_t size, hsa_agent_t* agents,
-                                                 int num_agent, hsa_amd_memory_pool_t pool,
-                                                 uint32_t flags, void** agent_ptr);
-
-/**
- *
- * @brief Unpin the host pointer previously pinned via ::hsa_amd_memory_lock or
- * ::hsa_amd_memory_lock_to_pool.
- *
- * @details The behavior is undefined if the host pointer being unpinned does not
- * match previous pinned address or if the host pointer was already deallocated.
- *
- * @param[in] host_ptr A buffer allocated by C/C++ or OS allocator that was
- * pinned previously via ::hsa_amd_memory_lock or ::hsa_amd_memory_lock_to_pool.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- */
-hsa_status_t HSA_API hsa_amd_memory_unlock(void* host_ptr);
-
-/**
- * @brief Sets the first @p count of uint32_t of the block of memory pointed by
- * @p ptr to the specified @p value.
- *
- * @param[in] ptr Pointer to the block of memory to fill.
- *
- * @param[in] value Value to be set.
- *
- * @param[in] count Number of uint32_t element to be set to the value.
- *
- * @retval HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is NULL or
- * not 4 bytes aligned
- *
- * @retval HSA_STATUS_ERROR_INVALID_ALLOCATION if the given memory
- * region was not allocated with HSA runtime APIs.
- *
- */
-hsa_status_t HSA_API
-    hsa_amd_memory_fill(void* ptr, uint32_t value, size_t count);
-
-/**
- * @brief Maps an interop object into the HSA flat address space and establishes
- * memory residency.  The metadata pointer is valid during the lifetime of the
- * map (until hsa_amd_interop_unmap_buffer is called).
- * Multiple calls to hsa_amd_interop_map_buffer with the same interop_handle
- * result in multiple mappings with potentially different addresses and
- * different metadata pointers.  Concurrent operations on these addresses are
- * not coherent.  Memory must be fenced to system scope to ensure consistency,
- * between mappings and with any views of this buffer in the originating
- * software stack.
- *
- * @param[in] num_agents Number of agents which require access to the memory
- *
- * @param[in] agents List of accessing agents.
- *
- * @param[in] interop_handle Handle of interop buffer (dmabuf handle in Linux)
- *
- * @param [in] flags Reserved, must be 0
- *
- * @param[out] size Size in bytes of the mapped object
- *
- * @param[out] ptr Base address of the mapped object
- *
- * @param[out] metadata_size Size of metadata in bytes, may be NULL
- *
- * @param[out] metadata Pointer to metadata, may be NULL
- *
- * @retval HSA_STATUS_SUCCESS if successfully mapped
- *
- * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized
- *
- * @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating
- * necessary resources
- *
- * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT all other errors
- */
-hsa_status_t HSA_API hsa_amd_interop_map_buffer(uint32_t num_agents,
-                                        hsa_agent_t* agents,
-                                        int interop_handle,
-                                        uint32_t flags,
-                                        size_t* size,
-                                        void** ptr,
-                                        size_t* metadata_size,
-                                        const void** metadata);
-
-/**
- * @brief Removes a previously mapped interop object from HSA's flat address space.
- * Ends lifetime for the mapping's associated metadata pointer.
- */
-hsa_status_t HSA_API hsa_amd_interop_unmap_buffer(void* ptr);
-
-/**
- * @brief Encodes an opaque vendor specific image format.  The length of data
- * depends on the underlying format.  This structure must not be copied as its
- * true length can not be determined.
- */
-typedef struct hsa_amd_image_descriptor_s {
-  /*
-  Version number of the descriptor
-  */
-  uint32_t version;
-
-  /*
-  Vendor and device PCI IDs for the format as VENDOR_ID<<16|DEVICE_ID.
-  */
-  uint32_t deviceID;
-
-  /*
-  Start of vendor specific data.
-  */
-  uint32_t data[1];
-} hsa_amd_image_descriptor_t;
-
-/**
- * @brief Creates an image from an opaque vendor specific image format.
- * Does not modify data at image_data.  Intended initially for
- * accessing interop images.
- *
- * @param agent[in] Agent on which to create the image
- *
- * @param[in] image_descriptor[in] Vendor specific image format
- *
- * @param[in] image_data Pointer to image backing store
- *
- * @param[in] access_permission Access permissions for the image object
- *
- * @param[out] image Created image object.
- *
- * @retval HSA_STATUS_SUCCESS Image created successfully
- *
- * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized
- *
- * @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating
- * necessary resources
- *
- * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT Bad or mismatched descriptor,
- * null image_data, or mismatched access_permission.
- */
-hsa_status_t HSA_API hsa_amd_image_create(
-    hsa_agent_t agent,
-    const hsa_ext_image_descriptor_t *image_descriptor,
-    const hsa_amd_image_descriptor_t *image_layout,
-    const void *image_data,
-    hsa_access_permission_t access_permission,
-    hsa_ext_image_t *image
-);
-
-/**
- * @brief Denotes the type of memory in a pointer info query.
- */
-typedef enum {
-  /*
-  Memory is not known to the HSA driver.  Unallocated or unlocked system memory.
-  */
-  HSA_EXT_POINTER_TYPE_UNKNOWN = 0,
-  /*
-  Memory was allocated with an HSA memory allocator.
-  */
-  HSA_EXT_POINTER_TYPE_HSA = 1,
-  /*
-  System memory which has been locked for use with an HSA agent.
-
-  Memory of this type is normal malloc'd memory and is always accessible to
-  the CPU.  Pointer info queries may not include CPU agents in the accessible
-  agents list as the CPU has implicit access.
-  */
-  HSA_EXT_POINTER_TYPE_LOCKED = 2,
-  /*
-  Memory originated in a graphics component and is shared with ROCr.
-  */
-  HSA_EXT_POINTER_TYPE_GRAPHICS = 3,
-  /*
-  Memory has been shared with the local process via ROCr IPC APIs.
-  */
-  HSA_EXT_POINTER_TYPE_IPC = 4
-} hsa_amd_pointer_type_t;
-
-/**
- * @brief Describes a memory allocation known to ROCr.
- * Within a ROCr major version this structure can only grow.
- */
-typedef struct hsa_amd_pointer_info_s {
-  /*
-  Size in bytes of this structure.  Used for version control within a major ROCr
-  revision.  Set to sizeof(hsa_amd_pointer_t) prior to calling
-  hsa_amd_pointer_info.  If the runtime supports an older version of pointer
-  info then size will be smaller on return.  Members starting after the return
-  value of size will not be updated by hsa_amd_pointer_info.
-  */
-  uint32_t size;
-  /*
-  The type of allocation referenced.
-  */
-  hsa_amd_pointer_type_t type;
-  /*
-  Base address at which non-host agents may access the allocation.
-  */
-  void* agentBaseAddress;
-  /*
-  Base address at which the host agent may access the allocation.
-  */
-  void* hostBaseAddress;
-  /*
-  Size of the allocation
-  */
-  size_t sizeInBytes;
-  /*
-  Application provided value.
-  */
-  void* userData;
-  /*
-  Reports an agent which "owns" (ie has preferred access to) the pool in which the allocation was
-  made.  When multiple agents share equal access to a pool (ex: multiple CPU agents, or multi-die
-  GPU boards) any such agent may be returned.
-  */
-  hsa_agent_t agentOwner;
-} hsa_amd_pointer_info_t;
-
-/**
- * @brief Retrieves information about the allocation referenced by the given
- * pointer.  Optionally returns the number and list of agents which can
- * directly access the allocation.
- *
- * @param[in] ptr Pointer which references the allocation to retrieve info for.
- *
- * @param[in, out] info Pointer to structure to be filled with allocation info.
- * Data member size must be set to the size of the structure prior to calling
- * hsa_amd_pointer_info.  On return size will be set to the size of the
- * pointer info structure supported by the runtime, if smaller.  Members
- * beyond the returned value of size will not be updated by the API.
- * Must not be NULL.
- *
- * @param[in] alloc Function pointer to an allocator used to allocate the
- * @p accessible array.  If NULL @p accessible will not be returned.
- *
- * @param[out] num_agents_accessible Recieves the count of agents in
- * @p accessible.  If NULL @p accessible will not be returned.
- *
- * @param[out] accessible Recieves a pointer to the array, allocated by @p alloc,
- * holding the list of agents which may directly access the allocation.
- * May be NULL.
- *
- * @retval HSA_STATUS_SUCCESS Info retrieved successfully
- *
- * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized
- *
- * @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating
- * necessary resources
- *
- * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT NULL in @p ptr or @p info.
- */
-hsa_status_t HSA_API hsa_amd_pointer_info(void* ptr,
-                                          hsa_amd_pointer_info_t* info,
-                                          void* (*alloc)(size_t),
-                                          uint32_t* num_agents_accessible,
-                                          hsa_agent_t** accessible);
-
-/**
- * @brief Associates an arbitrary pointer with an allocation known to ROCr.
- * The pointer can be fetched by hsa_amd_pointer_info in the userData field.
- *
- * @param[in] ptr Pointer to the first byte of an allocation known to ROCr
- * with which to associate @p userdata.
- *
- * @param[in] userdata Abitrary pointer to associate with the allocation.
- *
- * @retval HSA_STATUS_SUCCESS @p userdata successfully stored.
- *
- * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized
- *
- * @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating
- * necessary resources
- *
- * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is not known to ROCr.
- */
-hsa_status_t HSA_API hsa_amd_pointer_info_set_userdata(void* ptr,
-                                                       void* userdata);
-
-/**
- * @brief 256-bit process independent identifier for a ROCr shared memory
- * allocation.
- */
-typedef struct hsa_amd_ipc_memory_s {
-  uint32_t handle[8];
-} hsa_amd_ipc_memory_t;
-
-/**
- * @brief Prepares an allocation for interprocess sharing and creates a
- * handle of type hsa_amd_ipc_memory_t uniquely identifying the allocation.  A
- * handle is valid while the allocation it references remains accessible in
- * any process.  In general applications should confirm that a shared memory
- * region has been attached (via hsa_amd_ipc_memory_attach) in the remote
- * process prior to releasing that memory in the local process.
- * Repeated calls for the same allocation may, but are not required to, return
- * unique handles.
- *
- * @param[in] ptr Pointer to memory allocated via ROCr APIs to prepare for
- * sharing.
- *
- * @param[in] len Length in bytes of the allocation to share.
- *
- * @param[out] handle Process independent identifier referencing the shared
- * allocation.
- *
- * @retval HSA_STATUS_SUCCESS allocation is prepared for interprocess sharing.
- *
- * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized
- *
- * @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating
- * necessary resources
- *
- * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr does not point to the
- * first byte of an allocation made through ROCr, or len is not the full length
- * of the allocation or handle is NULL.
- */
-hsa_status_t HSA_API hsa_amd_ipc_memory_create(void* ptr, size_t len,
-                                               hsa_amd_ipc_memory_t* handle);
-
-/**
- * @brief Imports shared memory into the local process and makes it accessible
- * by the given agents.  If a shared memory handle is attached multiple times
- * in a process each attach may return a different address.  Each returned
- * address is refcounted and requires a matching number of calls to
- * hsa_amd_ipc_memory_detach to release the shared memory mapping.
- *
- * @param[in] handle Pointer to the identifier for the shared memory.
- *
- * @param[in] len Length of the shared memory to import.
- * Reserved.  Must be the full length of the shared allocation in this version.
- *
- * @param[in] num_agents Count of agents in @p mapping_agents.
- * May be zero if all agents are to be allowed access.
- *
- * @param[in] mapping_agents List of agents to access the shared memory.
- * Ignored if @p num_agents is zero.
- *
- * @param[out] mapped_ptr Recieves a process local pointer to the shared memory.
- *
- * @retval HSA_STATUS_SUCCESS if memory is successfully imported.
- *
- * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized
- *
- * @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating
- * necessary resources
- *
- * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p handle is not valid, @p len is
- * incorrect, @p mapped_ptr is NULL, or some agent for which access was
- * requested can not access the shared memory.
- */
-hsa_status_t HSA_API hsa_amd_ipc_memory_attach(
-    const hsa_amd_ipc_memory_t* handle, size_t len,
-    uint32_t num_agents,
-    const hsa_agent_t* mapping_agents,
-    void** mapped_ptr);
-
-/**
- * @brief Decrements the reference count for the shared memory mapping and
- * releases access to shared memory imported with hsa_amd_ipc_memory_attach.
- *
- * @param[in] mapped_ptr Pointer to the first byte of a shared allocation
- * imported with hsa_amd_ipc_memory_attach.
- *
- * @retval HSA_STATUS_SUCCESS if @p mapped_ptr was imported with
- * hsa_amd_ipc_memory_attach.
- *
- * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized
- *
- * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p mapped_ptr was not imported
- * with hsa_amd_ipc_memory_attach.
- */
-hsa_status_t HSA_API hsa_amd_ipc_memory_detach(void* mapped_ptr);
-
-/**
- * @brief 256-bit process independent identifier for a ROCr IPC signal.
- */
-typedef hsa_amd_ipc_memory_t hsa_amd_ipc_signal_t;
-
-/**
- * @brief Obtains an interprocess sharing handle for a signal.  The handle is
- * valid while the signal it references remains valid in any process.  In
- * general applications should confirm that the signal has been attached (via
- * hsa_amd_ipc_signal_attach) in the remote process prior to destroying that
- * signal in the local process.
- * Repeated calls for the same signal may, but are not required to, return
- * unique handles.
- *
- * @param[in] signal Signal created with attribute HSA_AMD_SIGNAL_IPC.
- *
- * @param[out] handle Process independent identifier referencing the shared
- * signal.
- *
- * @retval HSA_STATUS_SUCCESS @p handle is ready to use for interprocess sharing.
- *
- * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized
- *
- * @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating
- * necessary resources
- *
- * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p signal is not a valid signal
- * created with attribute HSA_AMD_SIGNAL_IPC or handle is NULL.
- */
-hsa_status_t HSA_API hsa_amd_ipc_signal_create(hsa_signal_t signal, hsa_amd_ipc_signal_t* handle);
-
-/**
- * @brief Imports an IPC capable signal into the local process.  If an IPC
- * signal handle is attached multiple times in a process each attach may return
- * a different signal handle.  Each returned signal handle is refcounted and
- * requires a matching number of calls to hsa_signal_destroy to release the
- * shared signal.
- *
- * @param[in] handle Pointer to the identifier for the shared signal.
- *
- * @param[out] signal Recieves a process local signal handle to the shared signal.
- *
- * @retval HSA_STATUS_SUCCESS if the signal is successfully imported.
- *
- * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized
- *
- * @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating
- * necessary resources
- *
- * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p handle is not valid.
- */
-hsa_status_t HSA_API hsa_amd_ipc_signal_attach(const hsa_amd_ipc_signal_t* handle,
-                                               hsa_signal_t* signal);
-
-/**
- * @brief GPU system event type.
- */
-typedef enum hsa_amd_event_type_s {
-  /*
-   AMD GPU memory fault.
-   */
-  HSA_AMD_GPU_MEMORY_FAULT_EVENT = 0,
-} hsa_amd_event_type_t;
-
-/**
- * @brief Flags denoting the cause of a memory fault.
- */
-typedef enum {
-  // Page not present or supervisor privilege.
-  HSA_AMD_MEMORY_FAULT_PAGE_NOT_PRESENT = 1 << 0,
-  // Write access to a read-only page.
-  HSA_AMD_MEMORY_FAULT_READ_ONLY = 1 << 1,
-  // Execute access to a page marked NX.
-  HSA_AMD_MEMORY_FAULT_NX = 1 << 2,
-  // GPU attempted access to a host only page.
-  HSA_AMD_MEMORY_FAULT_HOST_ONLY = 1 << 3,
-  // DRAM ECC failure.
-  HSA_AMD_MEMORY_FAULT_DRAMECC = 1 << 4,
-  // Can't determine the exact fault address.
-  HSA_AMD_MEMORY_FAULT_IMPRECISE = 1 << 5,
-  // SRAM ECC failure (ie registers, no fault address).
-  HSA_AMD_MEMORY_FAULT_SRAMECC = 1 << 6,
-  // GPU reset following unspecified hang.
-  HSA_AMD_MEMORY_FAULT_HANG = 1 << 31
-} hsa_amd_memory_fault_reason_t;
-
-/**
- * @brief AMD GPU memory fault event data.
- */
-typedef struct hsa_amd_gpu_memory_fault_info_s {
-  /*
-  The agent where the memory fault occurred.
-  */
-  hsa_agent_t agent;
-  /*
-  Virtual address accessed.
-  */
-  uint64_t virtual_address;
-  /*
-  Bit field encoding the memory access failure reasons. There could be multiple bits set
-  for one fault.  Bits are defined in hsa_amd_memory_fault_reason_t.
-  */
-  uint32_t fault_reason_mask;
-} hsa_amd_gpu_memory_fault_info_t;
-
-/**
- * @brief AMD GPU event data passed to event handler.
- */
-typedef struct hsa_amd_event_s {
-  /*
-  The event type.
-  */
-  hsa_amd_event_type_t event_type;
-  union {
-    /*
-    The memory fault info, only valid when @p event_type is HSA_AMD_GPU_MEMORY_FAULT_EVENT.
-    */
-    hsa_amd_gpu_memory_fault_info_t memory_fault;
-  };
-} hsa_amd_event_t;
-
-typedef hsa_status_t (*hsa_amd_system_event_callback_t)(const hsa_amd_event_t* event, void* data);
-
-/**
- * @brief Register AMD GPU event handler.
- *
- * @param[in] callback Callback to be invoked when an event is triggered.
- * The HSA runtime passes two arguments to the callback: @p event
- * is defined per event by the HSA runtime, and @p data is the user data.
- *
- * @param[in] data User data that is passed to @p callback. May be NULL.
- *
- * @retval HSA_STATUS_SUCCESS The handler has been registered successfully.
- *
- * @retval HSA_STATUS_ERROR An event handler has already been registered.
- *
- * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p event is invalid.
- */
-hsa_status_t HSA_API hsa_amd_register_system_event_handler(hsa_amd_system_event_callback_t callback,
-                                                   void* data);
-
-/**
- * @brief Per-queue dispatch and wavefront scheduling priority.
- */
-typedef enum hsa_amd_queue_priority_s {
-  /*
-  Below normal/high priority compute and all graphics
-  */
-  HSA_AMD_QUEUE_PRIORITY_LOW = 0,
-  /*
-  Above low priority compute, below high priority compute and all graphics
-  */
-  HSA_AMD_QUEUE_PRIORITY_NORMAL = 1,
-  /*
-  Above low/normal priority compute and all graphics
-  */
-  HSA_AMD_QUEUE_PRIORITY_HIGH = 2,
-} hsa_amd_queue_priority_t;
-
-/**
- * @brief Modifies the dispatch and wavefront scheduling prioirty for a
- * given compute queue. The default is HSA_AMD_QUEUE_PRIORITY_NORMAL.
- *
- * @param[in] queue Compute queue to apply new priority to.
- *
- * @param[in] priority Priority to associate with queue.
- *
- * @retval HSA_STATUS_SUCCESS if priority was changed successfully.
- *
- * @retval HSA_STATUS_ERROR_INVALID_QUEUE if queue is not a valid
- * compute queue handle.
- *
- * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT if priority is not a valid
- * value from hsa_amd_queue_priority_t.
- */
-hsa_status_t HSA_API hsa_amd_queue_set_priority(hsa_queue_t* queue,
-                                                hsa_amd_queue_priority_t priority);
-
-/**
- * @brief Deallocation notifier function type.
- */
-typedef void (*hsa_amd_deallocation_callback_t)(void* ptr, void* user_data);
-
-/**
- * @brief Registers a deallocation notifier monitoring for release of agent
- * accessible address @p ptr.  If successful, @p callback will be invoked when
- * @p ptr is removed from accessibility from all agents.
- *
- * Notification callbacks are automatically deregistered when they are invoked.
- *
- * Note: The current version supports notifications of address release
- * originating from ::hsa_amd_memory_pool_free.  Support for other address
- * release APIs will follow.
- *
- * @param[in] ptr Agent accessible address to monitor for deallocation.  Passed
- * to @p callback.
- *
- * @param[in] callback Notifier to be invoked when @p ptr is released from
- * agent accessibility.
- *
- * @param[in] user_data User provided value passed to @p callback.  May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The notifier registered successfully
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION @p ptr does not refer to a valid agent accessible
- * address.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL or @p ptr is NULL.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating
- * necessary resources
- */
-hsa_status_t HSA_API hsa_amd_register_deallocation_callback(void* ptr,
-                                                    hsa_amd_deallocation_callback_t callback,
-                                                    void* user_data);
-
-/**
- * @brief Removes a deallocation notifier previously registered with
- * ::hsa_amd_register_deallocation_callback.  Arguments must be identical to
- * those given in ::hsa_amd_register_deallocation_callback.
- *
- * @param[in] ptr Agent accessible address which was monitored for deallocation.
- *
- * @param[in] callback Notifier to be removed.
- *
- * @retval ::HSA_STATUS_SUCCESS The notifier has been removed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT The given notifier was not registered.
- */
-hsa_status_t HSA_API hsa_amd_deregister_deallocation_callback(void* ptr,
-                                                      hsa_amd_deallocation_callback_t callback);
-
-#ifdef __cplusplus
-}  // end extern "C" block
-#endif
-
-#endif  // header guard
diff --git a/third_party/rocm/include/hsa/hsa_ext_finalize.h b/third_party/rocm/include/hsa/hsa_ext_finalize.h
deleted file mode 100644
index 94c4582..0000000
--- a/third_party/rocm/include/hsa/hsa_ext_finalize.h
+++ /dev/null
@@ -1,531 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-//
-// The University of Illinois/NCSA
-// Open Source License (NCSA)
-// 
-// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
-// 
-// Developed by:
-// 
-//                 AMD Research and AMD HSA Software Development
-// 
-//                 Advanced Micro Devices, Inc.
-// 
-//                 www.amd.com
-// 
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to
-// deal with the Software without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-// and/or sell copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following conditions:
-// 
-//  - Redistributions of source code must retain the above copyright notice,
-//    this list of conditions and the following disclaimers.
-//  - Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimers in
-//    the documentation and/or other materials provided with the distribution.
-//  - Neither the names of Advanced Micro Devices, Inc,
-//    nor the names of its contributors may be used to endorse or promote
-//    products derived from this Software without specific prior written
-//    permission.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-// DEALINGS WITH THE SOFTWARE.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
-#define HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
-
-#include "hsa.h"
-
-#undef HSA_API
-#ifdef HSA_EXPORT_FINALIZER
-#define HSA_API HSA_API_EXPORT
-#else
-#define HSA_API HSA_API_IMPORT
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif // __cplusplus
-
-struct BrigModuleHeader;
-typedef struct BrigModuleHeader* BrigModule_t;
-
-/** \defgroup ext-alt-finalizer-extensions Finalization Extensions
- *  @{
- */
-
-/**
- * @brief Enumeration constants added to ::hsa_status_t by this extension.
- */
-enum {
-  /**
-   * The HSAIL program is invalid.
-   */
-  HSA_EXT_STATUS_ERROR_INVALID_PROGRAM = 0x2000,
-  /**
-   * The HSAIL module is invalid.
-   */
-  HSA_EXT_STATUS_ERROR_INVALID_MODULE = 0x2001,
-  /**
-   * Machine model or profile of the HSAIL module do not match the machine model
-   * or profile of the HSAIL program.
-   */
-  HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE = 0x2002,
-  /**
-   * The HSAIL module is already a part of the HSAIL program.
-   */
-  HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED = 0x2003,
-  /**
-   * Compatibility mismatch between symbol declaration and symbol definition.
-   */
-  HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH = 0x2004,
-  /**
-   * The finalization encountered an error while finalizing a kernel or
-   * indirect function.
-   */
-  HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED = 0x2005,
-  /**
-   * Mismatch between a directive in the control directive structure and in
-   * the HSAIL kernel.
-   */
-  HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH = 0x2006
-};
-
-/** @} */
-
-/** \defgroup ext-alt-finalizer-program Finalization Program
- *  @{
- */
-
-/**
- * @brief HSAIL (BRIG) module. The HSA Programmer's Reference Manual contains
- * the definition of the BrigModule_t type.
- */
-typedef BrigModule_t hsa_ext_module_t;
-
-/**
- * @brief An opaque handle to a HSAIL program, which groups a set of HSAIL
- * modules that collectively define functions and variables used by kernels and
- * indirect functions.
- */
-typedef struct hsa_ext_program_s {
-  /**
-   * Opaque handle.
-   */
-  uint64_t handle;
-} hsa_ext_program_t;
-
-/**
- * @brief Create an empty HSAIL program.
- *
- * @param[in] machine_model Machine model used in the HSAIL program.
- *
- * @param[in] profile Profile used in the HSAIL program.
- *
- * @param[in] default_float_rounding_mode Default float rounding mode used in
- * the HSAIL program.
- *
- * @param[in] options Vendor-specific options. May be NULL.
- *
- * @param[out] program Memory location where the HSA runtime stores the newly
- * created HSAIL program handle.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
- * resources required for the operation.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p machine_model is invalid,
- * @p profile is invalid, @p default_float_rounding_mode is invalid, or
- * @p program is NULL.
- */
-hsa_status_t HSA_API hsa_ext_program_create(
-    hsa_machine_model_t machine_model,
-    hsa_profile_t profile,
-    hsa_default_float_rounding_mode_t default_float_rounding_mode,
-    const char *options,
-    hsa_ext_program_t *program);
-
-/**
- * @brief Destroy a HSAIL program.
- *
- * @details The HSAIL program handle becomes invalid after it has been
- * destroyed. Code object handles produced by ::hsa_ext_program_finalize are
- * still valid after the HSAIL program has been destroyed, and can be used as
- * intended. Resources allocated outside and associated with the HSAIL program
- * (such as HSAIL modules that are added to the HSAIL program) can be released
- * after the finalization program has been destroyed.
- *
- * @param[in] program HSAIL program.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is
- * invalid.
- */
-hsa_status_t HSA_API hsa_ext_program_destroy(
-    hsa_ext_program_t program);
-
-/**
- * @brief Add a HSAIL module to an existing HSAIL program.
- *
- * @details The HSA runtime does not perform a deep copy of the HSAIL module
- * upon addition. Instead, it stores a pointer to the HSAIL module. The
- * ownership of the HSAIL module belongs to the application, which must ensure
- * that @p module is not released before destroying the HSAIL program.
- *
- * The HSAIL module is successfully added to the HSAIL program if @p module is
- * valid, if all the declarations and definitions for the same symbol are
- * compatible, and if @p module specify machine model and profile that matches
- * the HSAIL program.
- *
- * @param[in] program HSAIL program.
- *
- * @param[in] module HSAIL module. The application can add the same HSAIL module
- * to @p program at most once. The HSAIL module must specify the same machine
- * model and profile as @p program. If the floating-mode rounding mode of @p
- * module is not default, then it should match that of @p program.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
- * resources required for the operation.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is invalid.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_INVALID_MODULE The HSAIL module is invalid.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE The machine model of @p
- * module does not match machine model of @p program, or the profile of @p
- * module does not match profile of @p program.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED The HSAIL module is
- * already a part of the HSAIL program.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH Symbol declaration and symbol
- * definition compatibility mismatch. See the symbol compatibility rules in the
- * HSA Programming Reference Manual.
- */
-hsa_status_t HSA_API hsa_ext_program_add_module(
-    hsa_ext_program_t program,
-    hsa_ext_module_t module);
-
-/**
- * @brief Iterate over the HSAIL modules in a program, and invoke an
- * application-defined callback on every iteration.
- *
- * @param[in] program HSAIL program.
- *
- * @param[in] callback Callback to be invoked once per HSAIL module in the
- * program. The HSA runtime passes three arguments to the callback: the program,
- * a HSAIL module, and the application data.  If @p callback returns a status
- * other than ::HSA_STATUS_SUCCESS for a particular iteration, the traversal
- * stops and ::hsa_ext_program_iterate_modules returns that status value.
- *
- * @param[in] data Application data that is passed to @p callback on every
- * iteration. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The program is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
- */
-hsa_status_t HSA_API hsa_ext_program_iterate_modules(
-    hsa_ext_program_t program,
-    hsa_status_t (*callback)(hsa_ext_program_t program, hsa_ext_module_t module,
-                             void* data),
-    void* data);
-
-/**
- * @brief HSAIL program attributes.
- */
-typedef enum {
-  /**
-   * Machine model specified when the HSAIL program was created. The type
-   * of this attribute is ::hsa_machine_model_t.
-   */
-  HSA_EXT_PROGRAM_INFO_MACHINE_MODEL = 0,
-  /**
-   * Profile specified when the HSAIL program was created. The type of
-   * this attribute is ::hsa_profile_t.
-   */
-  HSA_EXT_PROGRAM_INFO_PROFILE = 1,
-  /**
-   * Default float rounding mode specified when the HSAIL program was
-   * created. The type of this attribute is ::hsa_default_float_rounding_mode_t.
-   */
-  HSA_EXT_PROGRAM_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 2
-} hsa_ext_program_info_t;
-
-/**
- * @brief Get the current value of an attribute for a given HSAIL program.
- *
- * @param[in] program HSAIL program.
- *
- * @param[in] attribute Attribute to query.
- *
- * @param[out] value Pointer to an application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behaviour is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
- * HSAIL program attribute, or @p value is NULL.
- */
-hsa_status_t HSA_API hsa_ext_program_get_info(
-    hsa_ext_program_t program,
-    hsa_ext_program_info_t attribute,
-    void *value);
-
-/**
- * @brief Finalizer-determined call convention.
- */
-typedef enum {
- /**
-  * Finalizer-determined call convention.
-  */
-  HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO = -1
-} hsa_ext_finalizer_call_convention_t;
-
-/**
- * @brief Control directives specify low-level information about the
- * finalization process.
- */
-typedef struct hsa_ext_control_directives_s {
-  /**
-   * Bitset indicating which control directives are enabled. The bit assigned to
-   * a control directive is determined by the corresponding value in
-   * BrigControlDirective.
-   *
-   * If a control directive is disabled, its corresponding field value (if any)
-   * must be 0. Control directives that are only present or absent (such as
-   * partial workgroups) have no corresponding field as the presence of the bit
-   * in this mask is sufficient.
-   */
-  uint64_t control_directives_mask;
-  /**
-   * Bitset of HSAIL exceptions that must have the BREAK policy enabled. The bit
-   * assigned to an HSAIL exception is determined by the corresponding value
-   * in BrigExceptionsMask. If the kernel contains a enablebreakexceptions
-   * control directive, the finalizer uses the union of the two masks.
-   */
-  uint16_t break_exceptions_mask;
-  /**
-   * Bitset of HSAIL exceptions that must have the DETECT policy enabled. The
-   * bit assigned to an HSAIL exception is determined by the corresponding value
-   * in BrigExceptionsMask. If the kernel contains a enabledetectexceptions
-   * control directive, the finalizer uses the union of the two masks.
-   */
-  uint16_t detect_exceptions_mask;
-  /**
-   * Maximum size (in bytes) of dynamic group memory that will be allocated by
-   * the application for any dispatch of the kernel.  If the kernel contains a
-   * maxdynamicsize control directive, the two values should match.
-   */
-  uint32_t max_dynamic_group_size;
-  /**
-   * Maximum number of grid work-items that will be used by the application to
-   * launch the kernel. If the kernel contains a maxflatgridsize control
-   * directive, the value of @a max_flat_grid_size must not be greater than the
-   * value of the directive, and takes precedence.
-   *
-   * The value specified for maximum absolute grid size must be greater than or
-   * equal to the product of the values specified by @a required_grid_size.
-   *
-   * If the bit at position BRIG_CONTROL_MAXFLATGRIDSIZE is set in @a
-   * control_directives_mask, this field must be greater than 0.
-   */
-  uint64_t max_flat_grid_size;
-  /**
-   * Maximum number of work-group work-items that will be used by the
-   * application to launch the kernel. If the kernel contains a
-   * maxflatworkgroupsize control directive, the value of @a
-   * max_flat_workgroup_size must not be greater than the value of the
-   * directive, and takes precedence.
-   *
-   * The value specified for maximum absolute grid size must be greater than or
-   * equal to the product of the values specified by @a required_workgroup_size.
-   *
-   * If the bit at position BRIG_CONTROL_MAXFLATWORKGROUPSIZE is set in @a
-   * control_directives_mask, this field must be greater than 0.
-   */
-  uint32_t max_flat_workgroup_size;
-  /**
-   * Reserved. Must be 0.
-   */
-  uint32_t reserved1;
-  /**
-   * Grid size that will be used by the application in any dispatch of the
-   * kernel. If the kernel contains a requiredgridsize control directive, the
-   * dimensions should match.
-   *
-   * The specified grid size must be consistent with @a required_workgroup_size
-   * and @a required_dim. Also, the product of the three dimensions must not
-   * exceed @a max_flat_grid_size. Note that the listed invariants must hold
-   * only if all the corresponding control directives are enabled.
-   *
-   * If the bit at position BRIG_CONTROL_REQUIREDGRIDSIZE is set in @a
-   * control_directives_mask, the three dimension values must be greater than 0.
-   */
-  uint64_t required_grid_size[3];
-  /**
-   * Work-group size that will be used by the application in any dispatch of the
-   * kernel. If the kernel contains a requiredworkgroupsize control directive,
-   * the dimensions should match.
-   *
-   * The specified work-group size must be consistent with @a required_grid_size
-   * and @a required_dim. Also, the product of the three dimensions must not
-   * exceed @a max_flat_workgroup_size. Note that the listed invariants must
-   * hold only if all the corresponding control directives are enabled.
-   *
-   * If the bit at position BRIG_CONTROL_REQUIREDWORKGROUPSIZE is set in @a
-   * control_directives_mask, the three dimension values must be greater than 0.
-   */
-  hsa_dim3_t required_workgroup_size;
-  /**
-   * Number of dimensions that will be used by the application to launch the
-   * kernel. If the kernel contains a requireddim control directive, the two
-   * values should match.
-   *
-   * The specified dimensions must be consistent with @a required_grid_size and
-   * @a required_workgroup_size. This invariant must hold only if all the
-   * corresponding control directives are enabled.
-   *
-   * If the bit at position BRIG_CONTROL_REQUIREDDIM is set in @a
-   * control_directives_mask, this field must be 1, 2, or 3.
-   */
-  uint8_t required_dim;
-  /**
-   * Reserved. Must be 0.
-   */
-  uint8_t reserved2[75];
-} hsa_ext_control_directives_t;
-
-/**
- * @brief Finalize an HSAIL program for a given instruction set architecture.
- *
- * @details Finalize all of the kernels and indirect functions that belong to
- * the same HSAIL program for a specific instruction set architecture (ISA). The
- * transitive closure of all functions specified by call or scall must be
- * defined. Kernels and indirect functions that are being finalized must be
- * defined. Kernels and indirect functions that are referenced in kernels and
- * indirect functions being finalized may or may not be defined, but must be
- * declared. All the global/readonly segment variables that are referenced in
- * kernels and indirect functions being finalized may or may not be defined, but
- * must be declared.
- *
- * @param[in] program HSAIL program.
- *
- * @param[in] isa Instruction set architecture to finalize for.
- *
- * @param[in] call_convention A call convention used in a finalization. Must
- * have a value between ::HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO (inclusive)
- * and the value of the attribute ::HSA_ISA_INFO_CALL_CONVENTION_COUNT in @p
- * isa (not inclusive).
- *
- * @param[in] control_directives Low-level control directives that influence
- * the finalization process.
- *
- * @param[in] options Vendor-specific options. May be NULL.
- *
- * @param[in] code_object_type Type of code object to produce.
- *
- * @param[out] code_object Code object generated by the Finalizer, which
- * contains the machine code for the kernels and indirect functions in the HSAIL
- * program. The code object is independent of the HSAIL module that was used to
- * generate it.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
- * resources required for the operation.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is
- * invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ISA @p isa is invalid.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH The directive in
- * the control directive structure and in the HSAIL kernel mismatch, or if the
- * same directive is used with a different value in one of the functions used by
- * this kernel.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED The Finalizer
- * encountered an error while compiling a kernel or an indirect function.
- */
-hsa_status_t HSA_API hsa_ext_program_finalize(
-    hsa_ext_program_t program,
-    hsa_isa_t isa,
-    int32_t call_convention,
-    hsa_ext_control_directives_t control_directives,
-    const char *options,
-    hsa_code_object_type_t code_object_type,
-    hsa_code_object_t *code_object);
-
-/** @} */
-
-#define hsa_ext_finalizer_1_00
-
-typedef struct hsa_ext_finalizer_1_00_pfn_s {
-  hsa_status_t (*hsa_ext_program_create)(
-      hsa_machine_model_t machine_model, hsa_profile_t profile,
-      hsa_default_float_rounding_mode_t default_float_rounding_mode,
-      const char *options, hsa_ext_program_t *program);
-
-  hsa_status_t (*hsa_ext_program_destroy)(hsa_ext_program_t program);
-
-  hsa_status_t (*hsa_ext_program_add_module)(hsa_ext_program_t program,
-                                                 hsa_ext_module_t module);
-
-  hsa_status_t (*hsa_ext_program_iterate_modules)(
-      hsa_ext_program_t program,
-      hsa_status_t (*callback)(hsa_ext_program_t program,
-                               hsa_ext_module_t module, void *data),
-      void *data);
-
-  hsa_status_t (*hsa_ext_program_get_info)(
-      hsa_ext_program_t program, hsa_ext_program_info_t attribute,
-      void *value);
-
-  hsa_status_t (*hsa_ext_program_finalize)(
-      hsa_ext_program_t program, hsa_isa_t isa, int32_t call_convention,
-      hsa_ext_control_directives_t control_directives, const char *options,
-      hsa_code_object_type_t code_object_type, hsa_code_object_t *code_object);
-} hsa_ext_finalizer_1_00_pfn_t;
-
-#ifdef __cplusplus
-} // extern "C" block
-#endif // __cplusplus
-
-#endif // HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
diff --git a/third_party/rocm/include/hsa/hsa_ext_image.h b/third_party/rocm/include/hsa/hsa_ext_image.h
deleted file mode 100644
index b25f168..0000000
--- a/third_party/rocm/include/hsa/hsa_ext_image.h
+++ /dev/null
@@ -1,1454 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-//
-// The University of Illinois/NCSA
-// Open Source License (NCSA)
-// 
-// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
-// 
-// Developed by:
-// 
-//                 AMD Research and AMD HSA Software Development
-// 
-//                 Advanced Micro Devices, Inc.
-// 
-//                 www.amd.com
-// 
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to
-// deal with the Software without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-// and/or sell copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following conditions:
-// 
-//  - Redistributions of source code must retain the above copyright notice,
-//    this list of conditions and the following disclaimers.
-//  - Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimers in
-//    the documentation and/or other materials provided with the distribution.
-//  - Neither the names of Advanced Micro Devices, Inc,
-//    nor the names of its contributors may be used to endorse or promote
-//    products derived from this Software without specific prior written
-//    permission.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-// DEALINGS WITH THE SOFTWARE.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef HSA_EXT_IMAGE_H
-#define HSA_EXT_IMAGE_H
-
-#include "hsa.h"
-
-#undef HSA_API
-#ifdef HSA_EXPORT_IMAGES
-#define HSA_API HSA_API_EXPORT
-#else
-#define HSA_API HSA_API_IMPORT
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif /*__cplusplus*/ 
-
-/** \defgroup ext-images Images and Samplers
- *  @{
- */
-
-/**
- * @brief Enumeration constants added to ::hsa_status_t by this extension.
- *
- * @remark Additions to hsa_status_t
- */
-enum {
-    /**
-     * Image format is not supported.
-     */
-    HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED = 0x3000,
-    /**
-     * Image size is not supported.
-     */
-    HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED = 0x3001,
-    /**
-     * Image pitch is not supported or invalid.
-     */
-    HSA_EXT_STATUS_ERROR_IMAGE_PITCH_UNSUPPORTED = 0x3002,
-    /**
-     * Sampler descriptor is not supported or invalid.
-     */
-    HSA_EXT_STATUS_ERROR_SAMPLER_DESCRIPTOR_UNSUPPORTED = 0x3003
-};
-
-/**
- * @brief Enumeration constants added to ::hsa_agent_info_t by this
- * extension.
- *
- * @remark Additions to hsa_agent_info_t
- */
-enum {
-  /**
-   * Maximum number of elements in 1D images. Must be at least 16384. The type
-   * of this attribute is size_t.
-   */
-  HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS = 0x3000,
-  /**
-   * Maximum number of elements in 1DA images. Must be at least 16384. The type
-   * of this attribute is size_t.
-   */
-  HSA_EXT_AGENT_INFO_IMAGE_1DA_MAX_ELEMENTS = 0x3001,
-  /**
-   * Maximum number of elements in 1DB images. Must be at least 65536. The type
-   * of this attribute is size_t.
-   */
-  HSA_EXT_AGENT_INFO_IMAGE_1DB_MAX_ELEMENTS = 0x3002,
-  /**
-   * Maximum dimensions (width, height) of 2D images, in image elements. The X
-   * and Y maximums must be at least 16384. The type of this attribute is
-   * size_t[2].
-   */
-  HSA_EXT_AGENT_INFO_IMAGE_2D_MAX_ELEMENTS = 0x3003,
-  /**
-   * Maximum dimensions (width, height) of 2DA images, in image elements. The X
-   * and Y maximums must be at least 16384. The type of this attribute is
-   * size_t[2].
-   */
-  HSA_EXT_AGENT_INFO_IMAGE_2DA_MAX_ELEMENTS = 0x3004,
-  /**
-   * Maximum dimensions (width, height) of 2DDEPTH images, in image
-   * elements. The X and Y maximums must be at least 16384. The type of this
-   * attribute is size_t[2].
-   */
-  HSA_EXT_AGENT_INFO_IMAGE_2DDEPTH_MAX_ELEMENTS = 0x3005,
-  /**
-   * Maximum dimensions (width, height) of 2DADEPTH images, in image
-   * elements. The X and Y maximums must be at least 16384. The type of this
-   * attribute is size_t[2].
-   */
-  HSA_EXT_AGENT_INFO_IMAGE_2DADEPTH_MAX_ELEMENTS = 0x3006,
-  /**
-   * Maximum dimensions (width, height, depth) of 3D images, in image
-   * elements. The maximum along any dimension must be at least 2048. The type
-   * of this attribute is size_t[3].
-   */
-  HSA_EXT_AGENT_INFO_IMAGE_3D_MAX_ELEMENTS = 0x3007,
-  /**
-   * Maximum number of image layers in a image array. Must be at least 2048. The
-   * type of this attribute is size_t.
-   */
-  HSA_EXT_AGENT_INFO_IMAGE_ARRAY_MAX_LAYERS = 0x3008,
-  /**
-   * Maximum number of read-only image handles that can be created for an agent at any one
-   * time. Must be at least 128. The type of this attribute is size_t.
-   */
-  HSA_EXT_AGENT_INFO_MAX_IMAGE_RD_HANDLES = 0x3009,
-  /**
-   * Maximum number of write-only and read-write image handles (combined) that
-   * can be created for an agent at any one time. Must be at least 64. The type of this
-   * attribute is size_t.
-   */
-  HSA_EXT_AGENT_INFO_MAX_IMAGE_RORW_HANDLES = 0x300A,
-  /**
-   * Maximum number of sampler handlers that can be created for an agent at any one
-   * time. Must be at least 16. The type of this attribute is size_t.
-   */
-  HSA_EXT_AGENT_INFO_MAX_SAMPLER_HANDLERS = 0x300B,
-  /**
-   * Image pitch alignment. The agent only supports linear image data
-   * layouts with a row pitch that is a multiple of this value. Must be
-   * a power of 2. The type of this attribute is size_t.
-   */
-  HSA_EXT_AGENT_INFO_IMAGE_LINEAR_ROW_PITCH_ALIGNMENT = 0x300C
-};
-
-/**
- * @brief Image handle, populated by ::hsa_ext_image_create or
- * ::hsa_ext_image_create_with_layout. Image
- * handles are only unique within an agent, not across agents.
- *
- */
-typedef struct hsa_ext_image_s {
-  /**
-   *  Opaque handle. For a given agent, two handles reference the same object of
-   *  the enclosing type if and only if they are equal.
-   */
-    uint64_t handle;
-
-} hsa_ext_image_t;
-
-/**
- * @brief Geometry associated with the image. This specifies the
- * number of image dimensions and whether the image is an image
- * array. See the <em>Image Geometry</em> section in the <em>HSA
- * Programming Reference Manual</em> for definitions on each
- * geometry. The enumeration values match the BRIG type @p
- * hsa_ext_brig_image_geometry_t.
- */
-typedef enum {
-/**
-   * One-dimensional image addressed by width coordinate.
-   */
-  HSA_EXT_IMAGE_GEOMETRY_1D = 0,
-
-  /**
-   * Two-dimensional image addressed by width and height coordinates.
-   */
-  HSA_EXT_IMAGE_GEOMETRY_2D = 1,
-
-  /**
-   * Three-dimensional image addressed by width, height, and depth coordinates.
-   */
-  HSA_EXT_IMAGE_GEOMETRY_3D = 2,
-
-  /**
-   * Array of one-dimensional images with the same size and format. 1D arrays
-   * are addressed by width and index coordinate.
-   */
-  HSA_EXT_IMAGE_GEOMETRY_1DA = 3,
-
-  /**
-   * Array of two-dimensional images with the same size and format. 2D arrays
-   * are addressed by width,  height, and index coordinates.
-   */
-  HSA_EXT_IMAGE_GEOMETRY_2DA = 4,
-
-  /**
-   * One-dimensional image addressed by width coordinate. It has
-   * specific restrictions compared to ::HSA_EXT_IMAGE_GEOMETRY_1D. An
-   * image with an opaque image data layout will always use a linear
-   * image data layout, and one with an explicit image data layout
-   * must specify ::HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR.
-   */
-  HSA_EXT_IMAGE_GEOMETRY_1DB = 5,
-
-  /**
-   * Two-dimensional depth image addressed by width and height coordinates.
-   */
-  HSA_EXT_IMAGE_GEOMETRY_2DDEPTH = 6,
-
-  /**
-   * Array of two-dimensional depth images with the same size and format. 2D
-   * arrays are addressed by width, height, and index coordinates.
-   */
-  HSA_EXT_IMAGE_GEOMETRY_2DADEPTH = 7
-} hsa_ext_image_geometry_t;
-
-/**
- * @brief Channel type associated with the elements of an image. See
- * the <em>Channel Type</em> section in the <em>HSA Programming Reference
- * Manual</em> for definitions on each channel type. The
- * enumeration values and definition match the BRIG type @p
- * hsa_ext_brig_image_channel_type_t.
- */
-typedef enum {
-    HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8 = 0,
-    HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16 = 1,
-    HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8 = 2,
-    HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT16 = 3,
-    HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT24 = 4,
-    HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 = 5,
-    HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 = 6,
-    HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010 = 7,
-    HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8 = 8,
-    HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16 = 9,
-    HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32 = 10,
-    HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 = 11,
-    HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 = 12,
-    HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 = 13,
-    HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT = 14,
-    HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT = 15
-} hsa_ext_image_channel_type_t;
-
-/**
- * @brief A fixed-size type used to represent ::hsa_ext_image_channel_type_t constants.
- */
-typedef uint32_t hsa_ext_image_channel_type32_t;
-    
-/**
- *
- * @brief Channel order associated with the elements of an image. See
- * the <em>Channel Order</em> section in the <em>HSA Programming Reference
- * Manual</em> for definitions on each channel order. The
- * enumeration values match the BRIG type @p
- * hsa_ext_brig_image_channel_order_t.
- */
-typedef enum {
-    HSA_EXT_IMAGE_CHANNEL_ORDER_A = 0,
-    HSA_EXT_IMAGE_CHANNEL_ORDER_R = 1,
-    HSA_EXT_IMAGE_CHANNEL_ORDER_RX = 2,
-    HSA_EXT_IMAGE_CHANNEL_ORDER_RG = 3,
-    HSA_EXT_IMAGE_CHANNEL_ORDER_RGX = 4,
-    HSA_EXT_IMAGE_CHANNEL_ORDER_RA = 5,
-    HSA_EXT_IMAGE_CHANNEL_ORDER_RGB = 6,
-    HSA_EXT_IMAGE_CHANNEL_ORDER_RGBX = 7,
-    HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA = 8,
-    HSA_EXT_IMAGE_CHANNEL_ORDER_BGRA = 9,
-    HSA_EXT_IMAGE_CHANNEL_ORDER_ARGB = 10,
-    HSA_EXT_IMAGE_CHANNEL_ORDER_ABGR = 11,
-    HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB = 12,
-    HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX = 13,
-    HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA = 14,
-    HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA = 15,
-    HSA_EXT_IMAGE_CHANNEL_ORDER_INTENSITY = 16,
-    HSA_EXT_IMAGE_CHANNEL_ORDER_LUMINANCE = 17,
-    HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH = 18,
-    HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL = 19
-} hsa_ext_image_channel_order_t;
-
-/**
- * @brief A fixed-size type used to represent ::hsa_ext_image_channel_order_t constants.
- */
-typedef uint32_t hsa_ext_image_channel_order32_t;
-    
-
-/**
- * @brief Image format.
- */
-typedef struct hsa_ext_image_format_s {
-  /**
-    * Channel type.
-    */
-    hsa_ext_image_channel_type32_t channel_type;
-
-   /**
-    * Channel order.
-    */
-    hsa_ext_image_channel_order32_t channel_order;
-} hsa_ext_image_format_t;
-
-/**
- * @brief Implementation independent image descriptor.
- */
-typedef struct hsa_ext_image_descriptor_s {
-    /**
-     * Image geometry.
-     */
-    hsa_ext_image_geometry_t geometry;
-    /**
-     * Width of the image, in components.
-     */
-    size_t width;
-    /**
-     * Height of the image, in components. Only used if the geometry is
-     * ::HSA_EXT_IMAGE_GEOMETRY_2D, ::HSA_EXT_IMAGE_GEOMETRY_3D,
-     * HSA_EXT_IMAGE_GEOMETRY_2DA, HSA_EXT_IMAGE_GEOMETRY_2DDEPTH, or
-     * HSA_EXT_IMAGE_GEOMETRY_2DADEPTH, otherwise must be 0.
-     */
-    size_t height;
-    /**
-     * Depth of the image, in components. Only used if the geometry is
-     * ::HSA_EXT_IMAGE_GEOMETRY_3D, otherwise must be 0.
-     */
-    size_t depth;
-    /**
-     * Number of image layers in the image array. Only used if the geometry is
-     * ::HSA_EXT_IMAGE_GEOMETRY_1DA, ::HSA_EXT_IMAGE_GEOMETRY_2DA, or
-     * HSA_EXT_IMAGE_GEOMETRY_2DADEPTH, otherwise must be 0.
-     */
-    size_t array_size;
-    /**
-     * Image format.
-     */
-    hsa_ext_image_format_t format;
-} hsa_ext_image_descriptor_t;
-
-/**
- * @brief Image capability.
- */
-typedef enum  {
-   /**
-    * Images of this geometry, format, and layout are not supported by
-    * the agent.
-    */
-    HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED = 0x0,
-   /**
-    * Read-only images of this geometry, format, and layout are
-    * supported by the agent.
-    */
-    HSA_EXT_IMAGE_CAPABILITY_READ_ONLY = 0x1,
-   /**
-    * Write-only images of this geometry, format, and layout are
-    * supported by the agent.
-    */
-    HSA_EXT_IMAGE_CAPABILITY_WRITE_ONLY = 0x2,
-   /**
-    * Read-write images of this geometry, format, and layout are
-    * supported by the agent.
-    */
-    HSA_EXT_IMAGE_CAPABILITY_READ_WRITE = 0x4,
-   /**
-    * @deprecated Images of this geometry, format, and layout can be accessed from
-    * read-modify-write atomic operations in the agent.
-    */
-    HSA_EXT_IMAGE_CAPABILITY_READ_MODIFY_WRITE = 0x8,
-    /**
-    * Images of this geometry, format, and layout are guaranteed to
-    * have a consistent data layout regardless of how they are
-    * accessed by the associated agent.
-    */
-    HSA_EXT_IMAGE_CAPABILITY_ACCESS_INVARIANT_DATA_LAYOUT = 0x10
-} hsa_ext_image_capability_t;
-
-/**
- * @brief Image data layout.
- *
- * @details An image data layout denotes such aspects of image data
- * layout as tiling and organization of channels in memory. Some image
- * data layouts may only apply to specific image geometries, formats,
- * and access permissions. Different agents may support different
- * image layout identifiers, including vendor specific layouts. Note
- * that an agent may not support the same image data layout for
- * different access permissions to images with the same image
- * geometry, size, and format. If multiple agents support the same
- * image data layout then it is possible to use separate image handles
- * for each agent that references the same image data.
- */
-
-typedef enum  {
-   /**
-    * An implementation specific opaque image data layout which can
-    * vary depending on the agent, geometry, image format, image size,
-    * and access permissions.
-    */
-    HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE = 0x0,
-   /**
-    * The image data layout is specified by the following rules in
-    * ascending byte address order. For a 3D image, 2DA image array,
-    * or 1DA image array, the image data is stored as a linear sequence
-    * of adjacent 2D image slices, 2D images, or 1D images
-    * respectively, spaced according to the slice pitch. Each 2D image
-    * is stored as a linear sequence of adjacent image rows, spaced
-    * according to the row pitch. Each 1D or 1DB image is stored as a
-    * single image row. Each image row is stored as a linear sequence
-    * of image elements. Each image element is stored as a linear
-    * sequence of image components specified by the left to right
-    * channel order definition. Each image component is stored using
-    * the memory type specified by the channel type.
-    *
-    * The 1DB image geometry always uses the linear image data layout.
-    */
-    HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR = 0x1
-} hsa_ext_image_data_layout_t;
-
-/**
- * @brief Retrieve the supported image capabilities for a given combination of
- * agent, geometry, and image format for an image created with an opaque image
- * data layout.
- *
- * @param[in] agent Agent to be associated with the image handle.
- *
- * @param[in] geometry Geometry.
- *
- * @param[in] image_format Pointer to an image format. Must not be NULL.
- *
- * @param[out] capability_mask Pointer to a memory location where the HSA
- * runtime stores a bit-mask of supported image capability
- * (::hsa_ext_image_capability_t) values. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p image_format is
- * NULL, or @p capability_mask is NULL.
- */
-hsa_status_t HSA_API hsa_ext_image_get_capability(
-    hsa_agent_t agent,
-    hsa_ext_image_geometry_t geometry,
-    const hsa_ext_image_format_t *image_format,
-    uint32_t *capability_mask);
-
-/**
- * @brief Retrieve the supported image capabilities for a given combination of
- * agent, geometry, image format, and image layout for an image created with
- * an explicit image data layout.
- *
- * @param[in] agent Agent to be associated with the image handle.
- *
- * @param[in] geometry Geometry.
- *
- * @param[in] image_format Pointer to an image format. Must not be NULL.
- *
- * @param[in] image_data_layout The image data layout.
- * It is invalid to use ::HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE; use
- * ::hsa_ext_image_get_capability instead.
- *
- * @param[out] capability_mask Pointer to a memory location where the HSA
- * runtime stores a bit-mask of supported image capability
- * (::hsa_ext_image_capability_t) values. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p image_format is
- * NULL, @p image_data_layout is ::HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE,
- * or @p capability_mask is NULL.
- */
-hsa_status_t HSA_API hsa_ext_image_get_capability_with_layout(
-    hsa_agent_t agent,
-    hsa_ext_image_geometry_t geometry,
-    const hsa_ext_image_format_t *image_format,
-    hsa_ext_image_data_layout_t image_data_layout,
-    uint32_t *capability_mask);
-
-/**
- * @brief Agent specific image size and alignment requirements, populated by
- * ::hsa_ext_image_data_get_info and ::hsa_ext_image_data_get_info_with_layout.
- */
-typedef struct hsa_ext_image_data_info_s {
-  /**
-   * Image data size, in bytes.
-   */
-  size_t size;
-
-  /**
-   * Image data alignment, in bytes. Must always be a power of 2.
-   */
-  size_t alignment;
-
-} hsa_ext_image_data_info_t;
-
-/**
- * @brief Retrieve the image data requirements for a given combination of agent, image
- * descriptor, and access permission for an image created with an opaque image
- * data layout.
- *
- * @details The optimal image data size and alignment requirements may
- * vary depending on the image attributes specified in @p
- * image_descriptor, the @p access_permission, and the @p agent. Also,
- * different implementations of the HSA runtime may return different
- * requirements for the same input values.
- *
- * The implementation must return the same image data requirements for
- * different access permissions with matching image descriptors as long
- * as ::hsa_ext_image_get_capability reports
- * ::HSA_EXT_IMAGE_CAPABILITY_ACCESS_INVARIANT_DATA_LAYOUT. Image
- * descriptors match if they have the same values, with the exception
- * that s-form channel orders match the corresponding non-s-form
- * channel order and vice versa.
- *
- * @param[in] agent Agent to be associated with the image handle.
- *
- * @param[in] image_descriptor Pointer to an image descriptor. Must not be NULL.
- *
- * @param[in] access_permission Access permission of the image when
- * accessed by @p agent. The access permission defines how the agent
- * is allowed to access the image and must match the corresponding
- * HSAIL image handle type. The @p agent must support the image format
- * specified in @p image_descriptor for the given @p
- * access_permission.
- *
- * @param[out] image_data_info Memory location where the runtime stores the
- * size and alignment requirements. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED The @p
- * agent does not support the image format specified by @p
- * image_descriptor with the specified @p access_permission.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED The agent
- * does not support the image dimensions specified by @p
- * image_descriptor with the specified @p access_permission.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p image_descriptor is NULL, @p
- * access_permission is not a valid access permission value, or @p
- * image_data_info is NULL.
- */
-hsa_status_t HSA_API hsa_ext_image_data_get_info(
-    hsa_agent_t agent,
-    const hsa_ext_image_descriptor_t *image_descriptor,
-    hsa_access_permission_t access_permission,
-    hsa_ext_image_data_info_t *image_data_info);
-
-/**
- * @brief Retrieve the image data requirements for a given combination of
- * image descriptor, access permission, image data layout, image data row pitch,
- * and image data slice pitch for an image created with an explicit image
- * data layout.
- *
- * @details The image data size and alignment requirements may vary
- * depending on the image attributes specified in @p image_descriptor,
- * the @p access_permission, and the image layout. However, different
- * implementations of the HSA runtime will return the same
- * requirements for the same input values.
- *
- * The implementation must return the same image data requirements for
- * different access permissions with matching image descriptors and
- * matching image layouts as long as ::hsa_ext_image_get_capability
- * reports
- * ::HSA_EXT_IMAGE_CAPABILITY_ACCESS_INVARIANT_DATA_LAYOUT. Image
- * descriptors match if they have the same values, with the exception
- * that s-form channel orders match the corresponding non-s-form
- * channel order and vice versa. Image layouts match if they are the
- * same image data layout and use the same image row and slice pitch
- * values.
- *
- * @param[in] image_descriptor Pointer to an image descriptor. Must not be NULL.
- *
- * @param[in] access_permission Access permission of the image when
- * accessed by an agent. The access permission defines how the agent
- * is allowed to access the image and must match the corresponding
- * HSAIL image handle type.
- *
- * @param[in] image_data_layout The image data layout to use.
- * It is invalid to use ::HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE; use
- * ::hsa_ext_image_data_get_info instead.
- *
- * @param[in] image_data_row_pitch The size in bytes for a single row
- * of the image in the image data. If 0 is specified then the default
- * row pitch value is used: image width * image element byte size.
- * The value used must be greater than or equal to the default row
- * pitch, and be a multiple of the image element byte size. For the
- * linear image layout it must also be a multiple of the image linear
- * row pitch alignment for the agents that will access the image data
- * using image instructions.
- *
- * @param[in] image_data_slice_pitch The size in bytes of a single
- * slice of a 3D image, or the size in bytes of each image layer in an
- * image array in the image data. If 0 is specified then the default
- * slice pitch value is used: row pitch * height if geometry is
- * ::HSA_EXT_IMAGE_GEOMETRY_3D, ::HSA_EXT_IMAGE_GEOMETRY_2DA, or
- * ::HSA_EXT_IMAGE_GEOMETRY_2DADEPTH; row pitch if geometry is
- * ::HSA_EXT_IMAGE_GEOMETRY_1DA; and 0 otherwise. The value used must
- * be 0 if the default slice pitch is 0, be greater than or equal to
- * the default slice pitch, and be a multiple of the row pitch.
- *
- * @param[out] image_data_info Memory location where the runtime stores the
- * size and alignment requirements. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED The image
- * format specified by @p image_descriptor is not supported for the
- * @p access_permission and @p image_data_layout specified.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED The image
- * dimensions specified by @p image_descriptor are not supported for
- * the @p access_permission and @p image_data_layout specified.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_PITCH_UNSUPPORTED The row and
- * slice pitch specified by @p image_data_row_pitch and @p
- * image_data_slice_pitch are invalid or not supported.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p image_descriptor is
- * NULL, @p image_data_layout is ::HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE,
- * or @p image_data_info is NULL.
- */
-hsa_status_t HSA_API hsa_ext_image_data_get_info_with_layout(
-    hsa_agent_t agent,
-    const hsa_ext_image_descriptor_t *image_descriptor,
-    hsa_access_permission_t access_permission,
-    hsa_ext_image_data_layout_t image_data_layout,
-    size_t image_data_row_pitch,
-    size_t image_data_slice_pitch,
-    hsa_ext_image_data_info_t *image_data_info);
-
-/**
- * @brief Creates an agent specific image handle to an image with an
- * opaque image data layout.
- *
- * @details Images with an opaque image data layout created with
- * different access permissions but matching image descriptors and
- * same agent can share the same image data if
- * ::HSA_EXT_IMAGE_CAPABILITY_ACCESS_INVARIANT_DATA_LAYOUT is reported
- * by ::hsa_ext_image_get_capability for the image format specified in
- * the image descriptor. Image descriptors match if they have the same
- * values, with the exception that s-form channel orders match the
- * corresponding non-s-form channel order and vice versa.
- *
- * If necessary, an application can use image operations (import,
- * export, copy, clear) to prepare the image for the intended use
- * regardless of the access permissions.
- *
- * @param[in] agent agent to be associated with the image handle created.
- *
- * @param[in] image_descriptor Pointer to an image descriptor. Must not be NULL.
- *
- * @param[in] image_data Image data buffer that must have been allocated
- * according to the size and alignment requirements dictated by
- * ::hsa_ext_image_data_get_info. Must not be NULL.
- *
- * Any previous memory contents are preserved upon creation. The application is
- * responsible for ensuring that the lifetime of the image data exceeds that of
- * all the associated images.
- *
- * @param[in] access_permission Access permission of the image when
- * accessed by agent. The access permission defines how the agent
- * is allowed to access the image using the image handle created and
- * must match the corresponding HSAIL image handle type. The agent
- * must support the image format specified in @p image_descriptor for
- * the given @p access_permission.
- *
- * @param[out] image Pointer to a memory location where the HSA runtime stores
- * the newly created image handle. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED The agent
- * does not have the capability to support the image format contained
- * in @p image_descriptor using the specified @p access_permission.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED The agent
- * does not support the image dimensions specified by @p
- * image_descriptor using the specified @p access_permission.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
- * the required resources.
- *
- * support the creation of more image handles with the given @p access_permission).
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p image_descriptor is NULL, @p
- * image_data is NULL, @p image_data does not have a valid alignment,
- * @p access_permission is not a valid access permission
- * value, or @p image is NULL.
- */
-hsa_status_t HSA_API hsa_ext_image_create(
-    hsa_agent_t agent,
-    const hsa_ext_image_descriptor_t *image_descriptor,
-    const void *image_data,
-    hsa_access_permission_t access_permission,
-    hsa_ext_image_t *image);
-
-/**
- * @brief Creates an agent specific image handle to an image with an explicit
- * image data layout.
- *
- * @details Images with an explicit image data layout created with
- * different access permissions but matching image descriptors and
- * matching image layout can share the same image data if
- * ::HSA_EXT_IMAGE_CAPABILITY_ACCESS_INVARIANT_DATA_LAYOUT is reported
- * by ::hsa_ext_image_get_capability_with_layout for the image format
- * specified in the image descriptor and specified image data
- * layout. Image descriptors match if they have the same values, with
- * the exception that s-form channel orders match the corresponding
- * non-s-form channel order and vice versa. Image layouts match if
- * they are the same image data layout and use the same image row and
- * slice values.
- *
- * If necessary, an application can use image operations (import, export, copy,
- * clear) to prepare the image for the intended use regardless of the access
- * permissions.
- *
- * @param[in] agent agent to be associated with the image handle created.
- *
- * @param[in] image_descriptor Pointer to an image descriptor. Must not be NULL.
- *
- * @param[in] image_data Image data buffer that must have been allocated
- * according to the size and alignment requirements dictated by
- * ::hsa_ext_image_data_get_info_with_layout. Must not be NULL.
- *
- * Any previous memory contents are preserved upon creation. The application is
- * responsible for ensuring that the lifetime of the image data exceeds that of
- * all the associated images.
- *
- * @param[in] access_permission Access permission of the image when
- * accessed by the agent. The access permission defines how the agent
- * is allowed to access the image and must match the corresponding
- * HSAIL image handle type. The agent must support the image format
- * specified in @p image_descriptor for the given @p access_permission
- * and @p image_data_layout.
- *
- * @param[in] image_data_layout The image data layout to use for the
- * @p image_data. It is invalid to use
- * ::HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE; use ::hsa_ext_image_create
- * instead.
- *
- * @param[in] image_data_row_pitch The size in bytes for a single row
- * of the image in the image data. If 0 is specified then the default
- * row pitch value is used: image width * image element byte size.
- * The value used must be greater than or equal to the default row
- * pitch, and be a multiple of the image element byte size. For the
- * linear image layout it must also be a multiple of the image linear
- * row pitch alignment for the agents that will access the image data
- * using image instructions.
- *
- * @param[in] image_data_slice_pitch The size in bytes of a single
- * slice of a 3D image, or the size in bytes of each image layer in an
- * image array in the image data. If 0 is specified then the default
- * slice pitch value is used: row pitch * height if geometry is
- * ::HSA_EXT_IMAGE_GEOMETRY_3D, ::HSA_EXT_IMAGE_GEOMETRY_2DA, or
- * ::HSA_EXT_IMAGE_GEOMETRY_2DADEPTH; row pitch if geometry is
- * ::HSA_EXT_IMAGE_GEOMETRY_1DA; and 0 otherwise. The value used must
- * be 0 if the default slice pitch is 0, be greater than or equal to
- * the default slice pitch, and be a multiple of the row pitch.
- *
- * @param[out] image Pointer to a memory location where the HSA runtime stores
- * the newly created image handle. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED The agent does
- * not have the capability to support the image format contained in the image
- * descriptor using the specified @p access_permission and @p image_data_layout.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED The agent
- * does not support the image dimensions specified by @p
- * image_descriptor using the specified @p access_permission and @p
- * image_data_layout.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_PITCH_UNSUPPORTED The agent does
- * not support the row and slice pitch specified by @p image_data_row_pitch
- * and @p image_data_slice_pitch, or the values are invalid.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
- * the required resources.
- *
- * support the creation of more image handles with the given @p access_permission).
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p image_descriptor is NULL, @p
- * image_data is NULL, @p image_data does not have a valid alignment,
- * @p image_data_layout is ::HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE,
- * or @p image is NULL.
- */
-hsa_status_t HSA_API hsa_ext_image_create_with_layout(
-    hsa_agent_t agent,
-    const hsa_ext_image_descriptor_t *image_descriptor,
-    const void *image_data,
-    hsa_access_permission_t access_permission,
-    hsa_ext_image_data_layout_t image_data_layout,
-    size_t image_data_row_pitch,
-    size_t image_data_slice_pitch,
-    hsa_ext_image_t *image);
-
-/**
- * @brief Destroy an image handle previously created using ::hsa_ext_image_create or
- * ::hsa_ext_image_create_with_layout.
- *
- * @details Destroying the image handle does not free the associated image data,
- * or modify its contents. The application should not destroy an image handle while
- * there are references to it queued for execution or currently being used in a
- * kernel dispatch.
- *
- * @param[in] agent Agent associated with the image handle.
- *
- * @param[in] image Image handle to destroy.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- */
-hsa_status_t HSA_API hsa_ext_image_destroy(
-    hsa_agent_t agent,
-    hsa_ext_image_t image);
-
-/**
- * @brief Copies a portion of one image (the source) to another image (the
- * destination).
- *
- * @details The source and destination image formats should be the
- * same, with the exception that s-form channel orders match the
- * corresponding non-s-form channel order and vice versa. For example,
- * it is allowed to copy a source image with a channel order of
- * HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB to a destination image with a
- * channel order of HSA_EXT_IMAGE_CHANNEL_ORDER_RGB.
- *
- * The source and destination images do not have to be of the same geometry and
- * appropriate scaling is performed by the HSA runtime. It is possible to copy
- * subregions between any combinations of source and destination geometries, provided
- * that the dimensions of the subregions are the same. For example, it is
- * allowed to copy a rectangular region from a 2D image to a slice of a 3D
- * image.
- *
- * If the source and destination image data overlap, or the combination of
- * offset and range references an out-out-bounds element in any of the images,
- * the behavior is undefined.
- *
- * @param[in] agent Agent associated with both the source and destination image handles.
- *
- * @param[in] src_image Image handle of source image. The agent associated with the source
- * image handle must be identical to that of the destination image.
- *
- * @param[in] src_offset Pointer to the offset within the source image where to
- * copy the data from. Must not be NULL.
- *
- * @param[in] dst_image Image handle of destination image.
- *
- * @param[in] dst_offset Pointer to the offset within the destination
- * image where to copy the data. Must not be NULL.
- *
- * @param[in] range Dimensions of the image portion to be copied. The HSA
- * runtime computes the size of the image data to be copied using this
- * argument. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p src_offset is
- * NULL, @p dst_offset is NULL, or @p range is NULL.
- */
-hsa_status_t HSA_API hsa_ext_image_copy(
-    hsa_agent_t agent,
-    hsa_ext_image_t src_image,
-    const hsa_dim3_t* src_offset,
-    hsa_ext_image_t dst_image,
-    const hsa_dim3_t* dst_offset,
-    const hsa_dim3_t* range);
-
-/**
- * @brief Image region.
- */
-typedef struct hsa_ext_image_region_s {
-   /**
-    * Offset within an image (in coordinates).
-    */
-    hsa_dim3_t offset;
-
-   /**
-    * Dimension size of the image range (in coordinates). The x, y, and z dimensions
-    * correspond to width, height, and depth or index respectively.
-    */
-    hsa_dim3_t range;
-} hsa_ext_image_region_t;
-
-/**
- * @brief Import a linearly organized image data from memory directly to an
- * image handle.
- *
- * @details This operation updates the image data referenced by the image handle
- * from the source memory. The size of the data imported from memory is
- * implicitly derived from the image region.
- *
- * It is the application's responsibility to avoid out of bounds memory access.
- *
- * None of the source memory or destination image data memory can
- * overlap. Overlapping of any of the source and destination image
- * data memory within the import operation produces undefined results.
- *
- * @param[in] agent Agent associated with the image handle.
- *
- * @param[in] src_memory Source memory. Must not be NULL.
- *
- * @param[in] src_row_pitch The size in bytes of a single row of the image in the
- * source memory. If the value is smaller than the destination image region
- * width * image element byte size, then region width * image element byte
- * size is used.
- *
- * @param[in] src_slice_pitch The size in bytes of a single 2D slice of a 3D image,
- * or the size in bytes of each image layer in an image array in the source memory.
- * If the geometry is ::HSA_EXT_IMAGE_GEOMETRY_1DA and the value is smaller than the
- * value used for @p src_row_pitch, then the value used for @p src_row_pitch is used.
- * If the geometry is ::HSA_EXT_IMAGE_GEOMETRY_3D, ::HSA_EXT_IMAGE_GEOMETRY_2DA, or
- * HSA_EXT_IMAGE_GEOMETRY_2DADEPTH and the value is smaller than the value used for
- * @p src_row_pitch * destination image region height, then the value used for
- * @p src_row_pitch * destination image region height is used.
- * Otherwise, the value is not used.
- *
- * @param[in] dst_image Image handle of destination image.
- *
- * @param[in] image_region Pointer to the image region to be updated. Must not
- * be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p src_memory is NULL, or @p
- * image_region is NULL.
- *
- */
-hsa_status_t HSA_API hsa_ext_image_import(
-    hsa_agent_t agent,
-    const void *src_memory,
-    size_t src_row_pitch,
-    size_t src_slice_pitch,
-    hsa_ext_image_t dst_image,
-    const hsa_ext_image_region_t *image_region);
-
-/**
- * @brief Export the image data to linearly organized memory.
- *
- * @details The operation updates the destination memory with the image data of
- * @p src_image. The size of the data exported to memory is implicitly derived
- * from the image region.
- *
- * It is the application's responsibility to avoid out of bounds memory access.
- *
- * None of the destination memory or source image data memory can
- * overlap. Overlapping of any of the source and destination image
- * data memory within the export operation produces undefined results.
- *
- * @param[in] agent Agent associated with the image handle.
- *
- * @param[in] src_image Image handle of source image.
- *
- * @param[in] dst_memory Destination memory. Must not be NULL.
- *
- * @param[in] dst_row_pitch The size in bytes of a single row of the image in the
- * destination memory. If the value is smaller than the source image region
- * width * image element byte size, then region width * image element byte
- * size is used.
- *
- * @param[in] dst_slice_pitch The size in bytes of a single 2D slice of a 3D image,
- * or the size in bytes of each image in an image array in the destination memory.
- * If the geometry is ::HSA_EXT_IMAGE_GEOMETRY_1DA and the value is smaller than the
- * value used for @p dst_row_pitch, then the value used for @p dst_row_pitch is used.
- * If the geometry is ::HSA_EXT_IMAGE_GEOMETRY_3D, ::HSA_EXT_IMAGE_GEOMETRY_2DA, or
- * HSA_EXT_IMAGE_GEOMETRY_2DADEPTH and the value is smaller than the value used for
- * @p dst_row_pitch * source image region height, then the value used for
- * @p dst_row_pitch * source image region height is used.
- * Otherwise, the value is not used.
- *
- * @param[in] image_region Pointer to the image region to be exported. Must not
- * be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p dst_memory is NULL, or @p
- * image_region is NULL.
- */
-hsa_status_t HSA_API hsa_ext_image_export(
-    hsa_agent_t agent,
-    hsa_ext_image_t src_image,
-    void *dst_memory,
-    size_t dst_row_pitch,
-    size_t dst_slice_pitch,
-    const hsa_ext_image_region_t *image_region);
-
-/**
- * @brief Clear a region of an image so that every image element has
- * the specified value.
- *
- * @param[in] agent Agent associated with the image handle.
- *
- * @param[in] image Image handle for image to be cleared.
- *
- * @param[in] data The value to which to set each image element being
- * cleared. It is specified as an array of image component values. The
- * number of array elements must match the number of access components
- * for the image channel order. The type of each array element must
- * match the image access type of the image channel type. When the
- * value is used to set the value of an image element, the conversion
- * method corresponding to the image channel type is used. See the
- * <em>Channel Order</em> section and <em>Channel Type</em> section in
- * the <em>HSA Programming Reference Manual</em> for more
- * information. Must not be NULL.
- *
- * @param[in] image_region Pointer to the image region to clear. Must not be
- * NULL. If the region references an out-out-bounds element, the behavior is
- * undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p data is NULL, or @p
- * image_region is NULL.
- */
-hsa_status_t HSA_API hsa_ext_image_clear(
-    hsa_agent_t agent,
-    hsa_ext_image_t image,
-    const void* data,
-    const hsa_ext_image_region_t *image_region);
-
-/**
- * @brief Sampler handle. Samplers are populated by
- * ::hsa_ext_sampler_create. Sampler handles are only unique within an
- * agent, not across agents.
- */
-typedef struct hsa_ext_sampler_s {
-  /**
-   *  Opaque handle. For a given agent, two handles reference the same object of
-   *  the enclosing type if and only if they are equal.
-   */
-    uint64_t handle;
-} hsa_ext_sampler_t;
-
-/**
- * @brief Sampler address modes. The sampler address mode describes
- * the processing of out-of-range image coordinates. See the
- * <em>Addressing Mode</em> section in the <em>HSA Programming Reference
- * Manual</em> for definitions on each address mode. The values
- * match the BRIG type @p hsa_ext_brig_sampler_addressing_t.
- */
-typedef enum {
-  /**
-   * Out-of-range coordinates are not handled.
-   */
-  HSA_EXT_SAMPLER_ADDRESSING_MODE_UNDEFINED = 0,
-
-  /**
-   * Clamp out-of-range coordinates to the image edge.
-   */
-  HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE = 1,
-
-  /**
-   * Clamp out-of-range coordinates to the image border color.
-   */
-  HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_BORDER = 2,
-
-  /**
-   * Wrap out-of-range coordinates back into the valid coordinate
-   * range so the image appears as repeated tiles.
-   */
-  HSA_EXT_SAMPLER_ADDRESSING_MODE_REPEAT = 3,
-
-  /**
-   * Mirror out-of-range coordinates back into the valid coordinate
-   * range so the image appears as repeated tiles with every other
-   * tile a reflection.
-   */
-  HSA_EXT_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT = 4
-
-} hsa_ext_sampler_addressing_mode_t;
-
-/**
- * @brief A fixed-size type used to represent ::hsa_ext_sampler_addressing_mode_t constants.
- */
-typedef uint32_t hsa_ext_sampler_addressing_mode32_t;
-
-/**
- * @brief Sampler coordinate normalization modes. See the
- * <em>Coordinate Normalization Mode</em> section in the <em>HSA
- * Programming Reference Manual</em> for definitions on each
- * coordinate normalization mode. The values match the BRIG type @p
- * hsa_ext_brig_sampler_coord_normalization_t.
- */
-typedef enum {
-
-  /**
-   * Coordinates are used to directly address an image element.
-   */
-  HSA_EXT_SAMPLER_COORDINATE_MODE_UNNORMALIZED = 0,
-
-  /**
-   * Coordinates are scaled by the image dimension size before being
-   * used to address an image element.
-   */
-  HSA_EXT_SAMPLER_COORDINATE_MODE_NORMALIZED = 1
-
-} hsa_ext_sampler_coordinate_mode_t;
-
-/**
- * @brief A fixed-size type used to represent ::hsa_ext_sampler_coordinate_mode_t constants.
- */
-typedef uint32_t hsa_ext_sampler_coordinate_mode32_t;
-    
-
-/**
- * @brief Sampler filter modes. See the <em>Filter Mode</em> section
- * in the <em>HSA Programming Reference Manual</em> for definitions
- * on each address mode. The enumeration values match the BRIG type @p
- * hsa_ext_brig_sampler_filter_t.
- */
-typedef enum {
-  /**
-   * Filter to the image element nearest (in Manhattan distance) to the
-   * specified coordinate.
-   */
-  HSA_EXT_SAMPLER_FILTER_MODE_NEAREST = 0,
-
-  /**
-   * Filter to the image element calculated by combining the elements in a 2x2
-   * square block or 2x2x2 cube block around the specified coordinate. The
-   * elements are combined using linear interpolation.
-   */
-  HSA_EXT_SAMPLER_FILTER_MODE_LINEAR = 1
-
-} hsa_ext_sampler_filter_mode_t;
-
-/**
- * @brief A fixed-size type used to represent ::hsa_ext_sampler_filter_mode_t constants.
- */
-typedef uint32_t hsa_ext_sampler_filter_mode32_t;
-
-/**
- * @brief Implementation independent sampler descriptor.
- */
-typedef struct hsa_ext_sampler_descriptor_s {
-  /**
-   * Sampler coordinate mode describes the normalization of image coordinates.
-   */
-  hsa_ext_sampler_coordinate_mode32_t coordinate_mode;
-
-  /**
-   * Sampler filter type describes the type of sampling performed.
-   */
-  hsa_ext_sampler_filter_mode32_t filter_mode;
-
-  /**
-   * Sampler address mode describes the processing of out-of-range image
-   * coordinates.
-   */
-  hsa_ext_sampler_addressing_mode32_t address_mode;
-
-} hsa_ext_sampler_descriptor_t;
-
-/**
- * @brief Create an agent specific sampler handle for a given agent
- * independent sampler descriptor and agent.
- *
- * @param[in] agent Agent to be associated with the sampler handle created.
- *
- * @param[in] sampler_descriptor Pointer to a sampler descriptor. Must not be
- * NULL.
- *
- * @param[out] sampler Memory location where the HSA runtime stores the newly
- * created sampler handle. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- *
- * @retval ::HSA_EXT_STATUS_ERROR_SAMPLER_DESCRIPTOR_UNSUPPORTED The
- * @p agent does not have the capability to support the properties
- * specified by @p sampler_descriptor or it is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
- * the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p sampler_descriptor is NULL, or
- * @p sampler is NULL.
- */
-hsa_status_t HSA_API hsa_ext_sampler_create(
-    hsa_agent_t agent,
-    const hsa_ext_sampler_descriptor_t *sampler_descriptor,
-    hsa_ext_sampler_t *sampler);
-
-/**
- * @brief Destroy a sampler handle previously created using ::hsa_ext_sampler_create.
- *
- * @details The sampler handle should not be destroyed while there are
- * references to it queued for execution or currently being used in a
- * kernel dispatch.
- *
- * @param[in] agent Agent associated with the sampler handle.
- *
- * @param[in] sampler Sampler handle to destroy.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
- */
-hsa_status_t HSA_API hsa_ext_sampler_destroy(
-    hsa_agent_t agent,
-    hsa_ext_sampler_t sampler);
-
-
-#define hsa_ext_images_1_00
-
-/**
- * @brief The function pointer table for the images v1.00 extension. Can be returned by ::hsa_system_get_extension_table or ::hsa_system_get_major_extension_table.
- */
-typedef struct hsa_ext_images_1_00_pfn_s {
-
-  hsa_status_t (*hsa_ext_image_get_capability)(
-    hsa_agent_t agent,
-    hsa_ext_image_geometry_t geometry,
-    const hsa_ext_image_format_t *image_format,
-    uint32_t *capability_mask);
-
-  hsa_status_t (*hsa_ext_image_data_get_info)(
-    hsa_agent_t agent,
-    const hsa_ext_image_descriptor_t *image_descriptor,
-    hsa_access_permission_t access_permission,
-    hsa_ext_image_data_info_t *image_data_info);
-
-  hsa_status_t (*hsa_ext_image_create)(
-    hsa_agent_t agent,
-    const hsa_ext_image_descriptor_t *image_descriptor,
-    const void *image_data,
-    hsa_access_permission_t access_permission,
-    hsa_ext_image_t *image);
-
-  hsa_status_t (*hsa_ext_image_destroy)(
-    hsa_agent_t agent,
-    hsa_ext_image_t image);
-
-  hsa_status_t (*hsa_ext_image_copy)(
-    hsa_agent_t agent,
-    hsa_ext_image_t src_image,
-    const hsa_dim3_t* src_offset,
-    hsa_ext_image_t dst_image,
-    const hsa_dim3_t* dst_offset,
-    const hsa_dim3_t* range);
-
-  hsa_status_t (*hsa_ext_image_import)(
-    hsa_agent_t agent,
-    const void *src_memory,
-    size_t src_row_pitch,
-    size_t src_slice_pitch,
-    hsa_ext_image_t dst_image,
-    const hsa_ext_image_region_t *image_region);
-
-  hsa_status_t (*hsa_ext_image_export)(
-    hsa_agent_t agent,
-    hsa_ext_image_t src_image,
-    void *dst_memory,
-    size_t dst_row_pitch,
-    size_t dst_slice_pitch,
-    const hsa_ext_image_region_t *image_region);
-
-  hsa_status_t (*hsa_ext_image_clear)(
-    hsa_agent_t agent,
-    hsa_ext_image_t image,
-    const void* data,
-    const hsa_ext_image_region_t *image_region);
-
-  hsa_status_t (*hsa_ext_sampler_create)(
-    hsa_agent_t agent,
-    const hsa_ext_sampler_descriptor_t *sampler_descriptor,
-    hsa_ext_sampler_t *sampler);
-
-  hsa_status_t (*hsa_ext_sampler_destroy)(
-    hsa_agent_t agent,
-    hsa_ext_sampler_t sampler);
-
-} hsa_ext_images_1_00_pfn_t;
-
-#define hsa_ext_images_1
-
-/**
- * @brief The function pointer table for the images v1 extension. Can be returned by ::hsa_system_get_extension_table or ::hsa_system_get_major_extension_table.
- */
-typedef struct hsa_ext_images_1_pfn_s {
-
-  hsa_status_t (*hsa_ext_image_get_capability)(
-    hsa_agent_t agent,
-    hsa_ext_image_geometry_t geometry,
-    const hsa_ext_image_format_t *image_format,
-    uint32_t *capability_mask);
-
-  hsa_status_t (*hsa_ext_image_data_get_info)(
-    hsa_agent_t agent,
-    const hsa_ext_image_descriptor_t *image_descriptor,
-    hsa_access_permission_t access_permission,
-    hsa_ext_image_data_info_t *image_data_info);
-
-  hsa_status_t (*hsa_ext_image_create)(
-    hsa_agent_t agent,
-    const hsa_ext_image_descriptor_t *image_descriptor,
-    const void *image_data,
-    hsa_access_permission_t access_permission,
-    hsa_ext_image_t *image);
-
-  hsa_status_t (*hsa_ext_image_destroy)(
-    hsa_agent_t agent,
-    hsa_ext_image_t image);
-
-  hsa_status_t (*hsa_ext_image_copy)(
-    hsa_agent_t agent,
-    hsa_ext_image_t src_image,
-    const hsa_dim3_t* src_offset,
-    hsa_ext_image_t dst_image,
-    const hsa_dim3_t* dst_offset,
-    const hsa_dim3_t* range);
-
-  hsa_status_t (*hsa_ext_image_import)(
-    hsa_agent_t agent,
-    const void *src_memory,
-    size_t src_row_pitch,
-    size_t src_slice_pitch,
-    hsa_ext_image_t dst_image,
-    const hsa_ext_image_region_t *image_region);
-
-  hsa_status_t (*hsa_ext_image_export)(
-    hsa_agent_t agent,
-    hsa_ext_image_t src_image,
-    void *dst_memory,
-    size_t dst_row_pitch,
-    size_t dst_slice_pitch,
-    const hsa_ext_image_region_t *image_region);
-
-  hsa_status_t (*hsa_ext_image_clear)(
-    hsa_agent_t agent,
-    hsa_ext_image_t image,
-    const void* data,
-    const hsa_ext_image_region_t *image_region);
-
-  hsa_status_t (*hsa_ext_sampler_create)(
-    hsa_agent_t agent,
-    const hsa_ext_sampler_descriptor_t *sampler_descriptor,
-    hsa_ext_sampler_t *sampler);
-
-  hsa_status_t (*hsa_ext_sampler_destroy)(
-    hsa_agent_t agent,
-    hsa_ext_sampler_t sampler);
-
-  hsa_status_t (*hsa_ext_image_get_capability_with_layout)(
-    hsa_agent_t agent,
-    hsa_ext_image_geometry_t geometry,
-    const hsa_ext_image_format_t *image_format,
-    hsa_ext_image_data_layout_t image_data_layout,
-    uint32_t *capability_mask);
-
-  hsa_status_t (*hsa_ext_image_data_get_info_with_layout)(
-    hsa_agent_t agent,
-    const hsa_ext_image_descriptor_t *image_descriptor,
-    hsa_access_permission_t access_permission,
-    hsa_ext_image_data_layout_t image_data_layout,
-    size_t image_data_row_pitch,
-    size_t image_data_slice_pitch,
-    hsa_ext_image_data_info_t *image_data_info);
-
-  hsa_status_t (*hsa_ext_image_create_with_layout)(
-    hsa_agent_t agent,
-    const hsa_ext_image_descriptor_t *image_descriptor,
-    const void *image_data,
-    hsa_access_permission_t access_permission,
-    hsa_ext_image_data_layout_t image_data_layout,
-    size_t image_data_row_pitch,
-    size_t image_data_slice_pitch,
-    hsa_ext_image_t *image);
-
-} hsa_ext_images_1_pfn_t;
-/** @} */
-    
-#ifdef __cplusplus
-}  // end extern "C" block
-#endif /*__cplusplus*/ 
-
-#endif
diff --git a/third_party/rocm/include/hsa/hsa_ven_amd_aqlprofile.h b/third_party/rocm/include/hsa/hsa_ven_amd_aqlprofile.h
deleted file mode 100644
index fb763c0..0000000
--- a/third_party/rocm/include/hsa/hsa_ven_amd_aqlprofile.h
+++ /dev/null
@@ -1,355 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-//
-// The University of Illinois/NCSA
-// Open Source License (NCSA)
-// 
-// Copyright (c) 2017-2020, Advanced Micro Devices, Inc. All rights reserved.
-// 
-// Developed by:
-// 
-//                 AMD Research and AMD HSA Software Development
-// 
-//                 Advanced Micro Devices, Inc.
-// 
-//                 www.amd.com
-// 
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to
-// deal with the Software without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-// and/or sell copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following conditions:
-// 
-//  - Redistributions of source code must retain the above copyright notice,
-//    this list of conditions and the following disclaimers.
-//  - Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimers in
-//    the documentation and/or other materials provided with the distribution.
-//  - Neither the names of Advanced Micro Devices, Inc,
-//    nor the names of its contributors may be used to endorse or promote
-//    products derived from this Software without specific prior written
-//    permission.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-// DEALINGS WITH THE SOFTWARE.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef OPENSRC_HSA_RUNTIME_INC_HSA_VEN_AMD_AQLPROFILE_H_
-#define OPENSRC_HSA_RUNTIME_INC_HSA_VEN_AMD_AQLPROFILE_H_
-
-#include <stdint.h>
-#include "hsa.h"
-
-#define HSA_AQLPROFILE_VERSION_MAJOR 2
-#define HSA_AQLPROFILE_VERSION_MINOR 0
-
-#ifdef __cplusplus
-extern "C" {
-#endif  // __cplusplus
-
-////////////////////////////////////////////////////////////////////////////////
-// Library version
-uint32_t hsa_ven_amd_aqlprofile_version_major();
-uint32_t hsa_ven_amd_aqlprofile_version_minor();
-
-///////////////////////////////////////////////////////////////////////
-// Library API:
-// The library provides helper methods for instantiation of
-// the profile context object and for populating of the start
-// and stop AQL packets. The profile object contains a profiling
-// events list and needed for profiling buffers descriptors,
-// a command buffer and an output data buffer. To check if there
-// was an error the library methods return a status code. Also
-// the library provides methods for querying required buffers
-// attributes, to validate the event attributes and to get profiling
-// output data.
-//
-// Returned status:
-//     hsa_status_t – HSA status codes are used from hsa.h header
-//
-// Supported profiling features:
-//
-// Supported profiling events
-typedef enum {
-  HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC = 0,
-  HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_TRACE = 1,
-} hsa_ven_amd_aqlprofile_event_type_t;
-
-// Supported performance counters (PMC) blocks
-// The block ID is the same for a block instances set, for example
-// each block instance from the TCC block set, TCC0, TCC1, …, TCCN
-// will have the same block ID HSA_VEN_AMD_AQLPROFILE_BLOCKS_TCC.
-typedef enum {
-  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC = 0,
-  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPF = 1,
-  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GDS = 2,
-  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBM = 3,
-  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBMSE = 4,
-  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SPI = 5,
-  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ = 6,
-  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQCS = 7,
-  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SRBM = 8,
-  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SX = 9,
-  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA = 10,
-  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCA = 11,
-  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC = 12,
-  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP = 13,
-  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD = 14,
-  // Memory related blocks
-  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCARB = 15,
-  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCHUB = 16,
-  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCMCBVM = 17,
-  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCSEQ = 18,
-  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCVML2 = 19,
-  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCXBAR = 20,
-  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATC = 21,
-  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATCL2 = 22,
-  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCEA = 23,
-  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_RPB = 24,
-  // System blocks
-  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SDMA = 25,
-
-  HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER
-} hsa_ven_amd_aqlprofile_block_name_t;
-
-// PMC event object structure
-// ‘counter_id’ value is specified in GFXIPs perfcounter user guides
-// which is the counters select value, “Performance Counters Selection”
-// chapter.
-typedef struct {
-  hsa_ven_amd_aqlprofile_block_name_t block_name;
-  uint32_t block_index;
-  uint32_t counter_id;
-} hsa_ven_amd_aqlprofile_event_t;
-
-// Check if event is valid for the specific GPU
-hsa_status_t hsa_ven_amd_aqlprofile_validate_event(
-    hsa_agent_t agent,                            // HSA handle for the profiling GPU
-    const hsa_ven_amd_aqlprofile_event_t* event,  // [in] Pointer on validated event
-    bool* result);                                // [out] True if the event valid, False otherwise
-
-// Profiling parameters
-// All parameters are generic and if not applicable for a specific
-// profile configuration then error status will be returned.
-typedef enum {
-  // Trace applicable parameters
-  HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET = 0,
-  HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK = 1,
-  HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK = 2,
-  HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK = 3,
-  HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2 = 4,
-  HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SE_MASK = 5,
-  HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SAMPLE_RATE = 6,
-  HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_K_CONCURRENT = 7,
-} hsa_ven_amd_aqlprofile_parameter_name_t;
-
-// Profile parameter object
-typedef struct {
-  hsa_ven_amd_aqlprofile_parameter_name_t parameter_name;
-  uint32_t value;
-} hsa_ven_amd_aqlprofile_parameter_t;
-
-//
-// Profile context object:
-// The library provides a profile object structure which contains
-// the events array, a buffer for the profiling start/stop commands
-// and a buffer for the output data.
-// The buffers are specified by the buffer descriptors and allocated
-// by the application. The buffers allocation attributes, the command
-// buffer size, the PMC output buffer size as well as profiling output
-// data can be get using the generic get profile info helper _get_info.
-//
-// Buffer descriptor
-typedef struct {
-  void* ptr;
-  uint32_t size;
-} hsa_ven_amd_aqlprofile_descriptor_t;
-
-// Profile context object structure, contains profiling events list and
-// needed for profiling buffers descriptors, a command buffer and
-// an output data buffer
-typedef struct {
-  hsa_agent_t agent;                                     // GFXIP handle
-  hsa_ven_amd_aqlprofile_event_type_t type;              // Events type
-  const hsa_ven_amd_aqlprofile_event_t* events;          // Events array
-  uint32_t event_count;                                  // Events count
-  const hsa_ven_amd_aqlprofile_parameter_t* parameters;  // Parameters array
-  uint32_t parameter_count;                              // Parameters count
-  hsa_ven_amd_aqlprofile_descriptor_t output_buffer;     // Output buffer
-  hsa_ven_amd_aqlprofile_descriptor_t command_buffer;    // PM4 commands
-} hsa_ven_amd_aqlprofile_profile_t;
-
-//
-// AQL packets populating methods:
-// The helper methods to populate provided by the application START and
-// STOP AQL packets which the application is required to submit before and
-// after profiled GPU task packets respectively.
-//
-// AQL Vendor Specific packet which carries a PM4 command
-typedef struct {
-  uint16_t header;
-  uint16_t pm4_command[27];
-  hsa_signal_t completion_signal;
-} hsa_ext_amd_aql_pm4_packet_t;
-
-// Method to populate the provided AQL packet with profiling start commands
-// Only 'pm4_command' fields of the packet are set and the application
-// is responsible to set Vendor Specific header type a completion signal
-hsa_status_t hsa_ven_amd_aqlprofile_start(
-    hsa_ven_amd_aqlprofile_profile_t* profile,        // [in/out] profile contex object
-    hsa_ext_amd_aql_pm4_packet_t* aql_start_packet);  // [out] profile start AQL packet
-
-// Method to populate the provided AQL packet with profiling stop commands
-// Only 'pm4_command' fields of the packet are set and the application
-// is responsible to set Vendor Specific header type and a completion signal
-hsa_status_t hsa_ven_amd_aqlprofile_stop(
-    const hsa_ven_amd_aqlprofile_profile_t* profile,  // [in] profile contex object
-    hsa_ext_amd_aql_pm4_packet_t* aql_stop_packet);   // [out] profile stop AQL packet
-
-// Method to populate the provided AQL packet with profiling read commands
-// Only 'pm4_command' fields of the packet are set and the application
-// is responsible to set Vendor Specific header type and a completion signal
-hsa_status_t hsa_ven_amd_aqlprofile_read(
-    const hsa_ven_amd_aqlprofile_profile_t* profile,  // [in] profile contex object
-    hsa_ext_amd_aql_pm4_packet_t* aql_read_packet);   // [out] profile stop AQL packet
-
-// Legacy devices, PM4 profiling packet size
-const unsigned HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE = 192;
-// Legacy devices, converting the profiling AQL packet to PM4 packet blob
-hsa_status_t hsa_ven_amd_aqlprofile_legacy_get_pm4(
-    const hsa_ext_amd_aql_pm4_packet_t* aql_packet,  // [in] AQL packet
-    void* data);                                     // [out] PM4 packet blob
-
-//
-// Get profile info:
-// Generic method for getting various profile info including profile buffers
-// attributes like the command buffer size and the profiling PMC results.
-// It’s implied that all counters are 64bit values.
-//
-// Profile generic output data:
-typedef struct {
-  uint32_t sample_id;  // PMC sample or trace buffer index
-  union {
-    struct {
-      hsa_ven_amd_aqlprofile_event_t event;  // PMC event
-      uint64_t result;                       // PMC result
-    } pmc_data;
-    hsa_ven_amd_aqlprofile_descriptor_t trace_data;  // Trace output data descriptor
-  };
-} hsa_ven_amd_aqlprofile_info_data_t;
-
-// ID query type
-typedef struct {
-  const char* name;
-  uint32_t id;
-  uint32_t instance_count;
-} hsa_ven_amd_aqlprofile_id_query_t;
-
-// Profile attributes
-typedef enum {
-  HSA_VEN_AMD_AQLPROFILE_INFO_COMMAND_BUFFER_SIZE = 0,  // get_info returns uint32_t value
-  HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA_SIZE = 1,        // get_info returns uint32_t value
-  HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA = 2,             // get_info returns PMC uint64_t value
-                                                        // in info_data object
-  HSA_VEN_AMD_AQLPROFILE_INFO_TRACE_DATA = 3,           // get_info returns trace buffer ptr/size
-                                                        // in info_data object
-                                                        //
-  HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_COUNTERS = 4,       // get_info returns number of block counter
-  HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID = 5,             // get_info returns block id, instances
-                                                        // by name string using _id_query_t
-                                                        //
-  HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD = 6,           // get_info returns size/pointer for
-                                                        // counters enable command buffer
-  HSA_VEN_AMD_AQLPROFILE_INFO_DISABLE_CMD = 7,          // get_info returns size/pointer for
-                                                        // counters disable command buffer
-} hsa_ven_amd_aqlprofile_info_type_t;
-
-// Definition of output data iterator callback
-typedef hsa_status_t (*hsa_ven_amd_aqlprofile_data_callback_t)(
-    hsa_ven_amd_aqlprofile_info_type_t info_type,   // [in] data type, PMC or trace data
-    hsa_ven_amd_aqlprofile_info_data_t* info_data,  // [in] info_data object
-    void* callback_data);                           // [in/out] data passed to the callback
-
-// Method for getting the profile info
-hsa_status_t hsa_ven_amd_aqlprofile_get_info(
-    const hsa_ven_amd_aqlprofile_profile_t* profile,  // [in] profile context object
-    hsa_ven_amd_aqlprofile_info_type_t attribute,     // [in] requested profile attribute
-    void* value);                                     // [in/out] returned value
-
-// Method for iterating the events output data
-hsa_status_t hsa_ven_amd_aqlprofile_iterate_data(
-    const hsa_ven_amd_aqlprofile_profile_t* profile,  // [in] profile context object
-    hsa_ven_amd_aqlprofile_data_callback_t callback,  // [in] callback to iterate the output data
-    void* data);                                      // [in/out] data passed to the callback
-
-// Return error string
-hsa_status_t hsa_ven_amd_aqlprofile_error_string(
-    const char** str);  // [out] pointer on the error string
-
-/**
- * @brief Extension version.
- */
-#define hsa_ven_amd_aqlprofile_VERSION_MAJOR 1
-#define hsa_ven_amd_aqlprofile_LIB(suff) "libhsa-amd-aqlprofile" suff ".so"
-
-#ifdef HSA_LARGE_MODEL
-static const char kAqlProfileLib[] = hsa_ven_amd_aqlprofile_LIB("64");
-#else
-static const char kAqlProfileLib[] = hsa_ven_amd_aqlprofile_LIB("");
-#endif
-
-/**
- * @brief Extension function table.
- */
-typedef struct hsa_ven_amd_aqlprofile_1_00_pfn_s {
-  uint32_t (*hsa_ven_amd_aqlprofile_version_major)();
-  uint32_t (*hsa_ven_amd_aqlprofile_version_minor)();
-
-  hsa_status_t (*hsa_ven_amd_aqlprofile_error_string)(
-      const char** str);
-
-  hsa_status_t (*hsa_ven_amd_aqlprofile_validate_event)(
-      hsa_agent_t agent,
-      const hsa_ven_amd_aqlprofile_event_t* event,
-      bool* result);
-
-  hsa_status_t (*hsa_ven_amd_aqlprofile_start)(
-      hsa_ven_amd_aqlprofile_profile_t* profile,
-      hsa_ext_amd_aql_pm4_packet_t* aql_start_packet);
-
-  hsa_status_t (*hsa_ven_amd_aqlprofile_stop)(
-      const hsa_ven_amd_aqlprofile_profile_t* profile,
-      hsa_ext_amd_aql_pm4_packet_t* aql_stop_packet);
-
-  hsa_status_t (*hsa_ven_amd_aqlprofile_read)(
-      const hsa_ven_amd_aqlprofile_profile_t* profile,
-      hsa_ext_amd_aql_pm4_packet_t* aql_read_packet);
-
-  hsa_status_t (*hsa_ven_amd_aqlprofile_legacy_get_pm4)(
-      const hsa_ext_amd_aql_pm4_packet_t* aql_packet,
-      void* data);
-
-  hsa_status_t (*hsa_ven_amd_aqlprofile_get_info)(
-      const hsa_ven_amd_aqlprofile_profile_t* profile,
-      hsa_ven_amd_aqlprofile_info_type_t attribute,
-      void* value);
-
-  hsa_status_t (*hsa_ven_amd_aqlprofile_iterate_data)(
-      const hsa_ven_amd_aqlprofile_profile_t* profile,
-      hsa_ven_amd_aqlprofile_data_callback_t callback,
-      void* data);
-} hsa_ven_amd_aqlprofile_1_00_pfn_t;
-
-typedef hsa_ven_amd_aqlprofile_1_00_pfn_t hsa_ven_amd_aqlprofile_pfn_t;
-
-#ifdef __cplusplus
-}
-#endif  // __cplusplus
-
-#endif  // OPENSRC_HSA_RUNTIME_INC_HSA_VEN_AMD_AQLPROFILE_H_
diff --git a/third_party/rocm/include/hsa/hsa_ven_amd_loader.h b/third_party/rocm/include/hsa/hsa_ven_amd_loader.h
deleted file mode 100644
index 3ce8475..0000000
--- a/third_party/rocm/include/hsa/hsa_ven_amd_loader.h
+++ /dev/null
@@ -1,589 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-//
-// The University of Illinois/NCSA
-// Open Source License (NCSA)
-//
-// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
-//
-// Developed by:
-//
-//                 AMD Research and AMD HSA Software Development
-//
-//                 Advanced Micro Devices, Inc.
-//
-//                 www.amd.com
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to
-// deal with the Software without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-// and/or sell copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following conditions:
-//
-//  - Redistributions of source code must retain the above copyright notice,
-//    this list of conditions and the following disclaimers.
-//  - Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimers in
-//    the documentation and/or other materials provided with the distribution.
-//  - Neither the names of Advanced Micro Devices, Inc,
-//    nor the names of its contributors may be used to endorse or promote
-//    products derived from this Software without specific prior written
-//    permission.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-// DEALINGS WITH THE SOFTWARE.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-// HSA AMD extension for additional loader functionality.
-
-#ifndef HSA_VEN_AMD_LOADER_H
-#define HSA_VEN_AMD_LOADER_H
-
-#include "hsa.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/**
- * @brief Queries equivalent host address for given @p device_address, and
- * records it in @p host_address.
- *
- *
- * @details Contents of memory pointed to by @p host_address would be identical
- * to contents of memory pointed to by @p device_address. Only difference
- * between the two is host accessibility: @p host_address is always accessible
- * from host, @p device_address might not be accessible from host.
- *
- * If @p device_address already points to host accessible memory, then the value
- * of @p device_address is simply copied into @p host_address.
- *
- * The lifetime of @p host_address is the same as the lifetime of @p
- * device_address, and both lifetimes are limited by the lifetime of the
- * executable that is managing these addresses.
- *
- *
- * @param[in] device_address Device address to query equivalent host address
- * for.
- *
- * @param[out] host_address Pointer to application-allocated buffer to record
- * queried equivalent host address in.
- *
- *
- * @retval HSA_STATUS_SUCCESS Function is executed successfully.
- *
- * @retval HSA_STATUS_ERROR_NOT_INITIALIZED Runtime is not initialized.
- *
- * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p device_address is invalid or
- * null, or @p host_address is null.
- */
-hsa_status_t hsa_ven_amd_loader_query_host_address(
-  const void *device_address,
-  const void **host_address);
-
-/**
- * @brief The storage type of the code object that is backing loaded memory
- * segment.
- */
-typedef enum {
-  /**
-   * Loaded memory segment is not backed by any code object (anonymous), as the
-   * case would be with BSS (uninitialized data).
-   */
-  HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE = 0,
-  /**
-   * Loaded memory segment is backed by the code object that is stored in the
-   * file.
-   */
-  HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE = 1,
-  /**
-   * Loaded memory segment is backed by the code object that is stored in the
-   * memory.
-   */
-  HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY = 2
-} hsa_ven_amd_loader_code_object_storage_type_t;
-
-/**
- * @brief Loaded memory segment descriptor.
- *
- *
- * @details Loaded memory segment descriptor describes underlying loaded memory
- * segment. Loaded memory segment is created/allocated by the executable during
- * the loading of the code object that is backing underlying memory segment.
- *
- * The lifetime of underlying memory segment is limited by the lifetime of the
- * executable that is managing underlying memory segment.
- */
-typedef struct hsa_ven_amd_loader_segment_descriptor_s {
-  /**
-   * Agent underlying memory segment is allocated on. If the code object that is
-   * backing underlying memory segment is program code object, then 0.
-   */
-  hsa_agent_t agent;
-  /**
-   * Executable that is managing this underlying memory segment.
-   */
-  hsa_executable_t executable;
-  /**
-   * Storage type of the code object that is backing underlying memory segment.
-   */
-  hsa_ven_amd_loader_code_object_storage_type_t code_object_storage_type;
-  /**
-   * If the storage type of the code object that is backing underlying memory
-   * segment is:
-   *   - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE, then null;
-   *   - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE, then null-terminated
-   *     filepath to the code object;
-   *   - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY, then host
-   *     accessible pointer to the first byte of the code object.
-   */
-  const void *code_object_storage_base;
-  /**
-   * If the storage type of the code object that is backing underlying memory
-   * segment is:
-   *   - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE, then 0;
-   *   - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE, then the length of
-   *     the filepath to the code object (including null-terminating character);
-   *   - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY, then the size, in
-   *     bytes, of the memory occupied by the code object.
-   */
-  size_t code_object_storage_size;
-  /**
-   * If the storage type of the code object that is backing underlying memory
-   * segment is:
-   *   - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE, then 0;
-   *   - other, then offset, in bytes, from the beginning of the code object to
-   *     the first byte in the code object data is copied from.
-   */
-  size_t code_object_storage_offset;
-  /**
-   * Starting address of the underlying memory segment.
-   */
-  const void *segment_base;
-  /**
-   * Size, in bytes, of the underlying memory segment.
-   */
-  size_t segment_size;
-} hsa_ven_amd_loader_segment_descriptor_t;
-
-/**
- * @brief Either queries loaded memory segment descriptors, or total number of
- * loaded memory segment descriptors.
- *
- *
- * @details If @p segment_descriptors is not null and @p num_segment_descriptors
- * points to number that exactly matches total number of loaded memory segment
- * descriptors, then queries loaded memory segment descriptors, and records them
- * in @p segment_descriptors. If @p segment_descriptors is null and @p
- * num_segment_descriptors points to zero, then queries total number of loaded
- * memory segment descriptors, and records it in @p num_segment_descriptors. In
- * all other cases returns appropriate error code (see below).
- *
- * The caller of this function is responsible for the allocation/deallocation
- * and the lifetime of @p segment_descriptors and @p num_segment_descriptors.
- *
- * The lifetime of loaded memory segments that are described by queried loaded
- * memory segment descriptors is limited by the lifetime of the executable that
- * is managing loaded memory segments.
- *
- * Queried loaded memory segment descriptors are always self-consistent: they
- * describe a complete set of loaded memory segments that are being backed by
- * fully loaded code objects that are present at the time (i.e. this function
- * is blocked until all executable manipulations are fully complete).
- *
- *
- * @param[out] segment_descriptors Pointer to application-allocated buffer to
- * record queried loaded memory segment descriptors in. Can be null if @p
- * num_segment_descriptors points to zero.
- *
- * @param[in,out] num_segment_descriptors Pointer to application-allocated
- * buffer that contains either total number of loaded memory segment descriptors
- * or zero.
- *
- *
- * @retval HSA_STATUS_SUCCESS Function is executed successfully.
- *
- * @retval HSA_STATUS_ERROR_NOT_INITIALIZED Runtime is not initialized.
- *
- * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p segment_descriptors is null
- * while @p num_segment_descriptors points to non-zero number, @p
- * segment_descriptors is not null while @p num_segment_descriptors points to
- * zero, or @p num_segment_descriptors is null.
- *
- * @retval HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS @p num_segment_descriptors
- * does not point to number that exactly matches total number of loaded memory
- * segment descriptors.
- */
-hsa_status_t hsa_ven_amd_loader_query_segment_descriptors(
-  hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
-  size_t *num_segment_descriptors);
-
-/**
- * @brief Obtains the handle of executable to which the device address belongs.
- *
- * @details This method should not be used to obtain executable handle by using
- * a host address. The executable returned is expected to be alive until its
- * destroyed by the user.
- *
- * @retval HSA_STATUS_SUCCESS Function is executed successfully.
- *
- * @retval HSA_STATUS_ERROR_NOT_INITIALIZED Runtime is not initialized.
- *
- * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT The input is invalid or there
- * is no exectuable found for this kernel code object.
- */
-hsa_status_t hsa_ven_amd_loader_query_executable(
-  const void *device_address,
-  hsa_executable_t *executable);
-
-//===----------------------------------------------------------------------===//
-
-/**
- * @brief Iterate over the loaded code objects in an executable, and invoke
- * an application-defined callback on every iteration.
- *
- * @param[in] executable Executable.
- *
- * @param[in] callback Callback to be invoked once per loaded code object. The
- * HSA runtime passes three arguments to the callback: the executable, a
- * loaded code object, and the application data. If @p callback returns a
- * status other than ::HSA_STATUS_SUCCESS for a particular iteration, the
- * traversal stops and
- * ::hsa_ven_amd_loader_executable_iterate_loaded_code_objects returns that
- * status value.
- *
- * @param[in] data Application data that is passed to @p callback on every
- * iteration. May be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
- */
-hsa_status_t hsa_ven_amd_loader_executable_iterate_loaded_code_objects(
-  hsa_executable_t executable,
-  hsa_status_t (*callback)(
-    hsa_executable_t executable,
-    hsa_loaded_code_object_t loaded_code_object,
-    void *data),
-  void *data);
-
-/**
- * @brief Loaded code object kind.
- */
-typedef enum {
-  /**
-   * Program code object.
-   */
-  HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_PROGRAM = 1,
-  /**
-   * Agent code object.
-   */
-  HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_AGENT = 2
-} hsa_ven_amd_loader_loaded_code_object_kind_t;
-
-/**
- * @brief Loaded code object attributes.
- */
-typedef enum hsa_ven_amd_loader_loaded_code_object_info_e {
-  /**
-   * The executable in which this loaded code object is loaded. The
-   * type of this attribute is ::hsa_executable_t.
-   */
-  HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_EXECUTABLE = 1,
-  /**
-   * The kind of this loaded code object. The type of this attribute is
-   * ::uint32_t interpreted as ::hsa_ven_amd_loader_loaded_code_object_kind_t.
-   */
-  HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_KIND = 2,
-  /**
-   * The agent on which this loaded code object is loaded. The
-   * value of this attribute is only defined if
-   * ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_KIND is
-   * ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_AGENT. The type of this
-   * attribute is ::hsa_agent_t.
-   */
-  HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_AGENT = 3,
-  /**
-   * The storage type of the code object reader used to load the loaded code object.
-   * The type of this attribute is ::uint32_t interpreted as a
-   * ::hsa_ven_amd_loader_code_object_storage_type_t.
-   */
-  HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE = 4,
-  /**
-   * The memory address of the first byte of the code object that was loaaded.
-   * The value of this attribute is only defined if
-   * ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE is
-   * ::HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY. The type of this
-   * attribute is ::uint64_t.
-   */
-  HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_BASE = 5,
-  /**
-   * The memory size in bytes of the code object that was loaaded.
-   * The value of this attribute is only defined if
-   * ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE is
-   * ::HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY. The type of this
-   * attribute is ::uint64_t.
-   */
-  HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_SIZE = 6,
-  /**
-   * The file descriptor of the code object that was loaaded.
-   * The value of this attribute is only defined if
-   * ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE is
-   * ::HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE. The type of this
-   * attribute is ::int.
-   */
-  HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_FILE = 7,
-  /**
-   * The signed byte address difference of the memory address at which the code
-   * object is loaded minus the virtual address specified in the code object
-   * that is loaded. The value of this attribute is only defined if the
-   * executable in which the code object is loaded is froozen. The type of this
-   * attribute is ::int64_t.
-   */
-  HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA = 8,
-  /**
-   * The base memory address at which the code object is loaded. This is the
-   * base address of the allocation for the lowest addressed segment of the code
-   * object that is loaded. Note that any non-loaded segments before the first
-   * loaded segment are ignored. The value of this attribute is only defined if
-   * the executable in which the code object is loaded is froozen. The type of
-   * this attribute is ::uint64_t.
-   */
-  HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE = 9,
-  /**
-   * The byte size of the loaded code objects contiguous memory allocation. The
-   * value of this attribute is only defined if the executable in which the code
-   * object is loaded is froozen. The type of this attribute is ::uint64_t.
-   */
-  HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE = 10,
-  /**
-   * The length of the URI in bytes, not including the NUL terminator. The type
-   * of this attribute is uint32_t.
-   */
-  HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH = 11,
-  /**
-   * The URI name from which the code object was loaded. The type of this
-   * attribute is a NUL terminated \p char* with the length equal to the value
-   * of ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH attribute.
-   * The URI name syntax is defined by the following BNF syntax:
-   *
-   *     code_object_uri ::== file_uri | memory_uri
-   *     file_uri        ::== "file://" file_path [ range_specifier ]
-   *     memory_uri      ::== "memory://" process_id range_specifier
-   *     range_specifier ::== [ "#" | "?" ] "offset=" number "&" "size=" number
-   *     file_path       ::== URI_ENCODED_OS_FILE_PATH
-   *     process_id      ::== DECIMAL_NUMBER
-   *     number          ::== HEX_NUMBER | DECIMAL_NUMBER | OCTAL_NUMBER
-   *
-   * ``number`` is a C integral literal where hexadecimal values are prefixed by
-   * "0x" or "0X", and octal values by "0".
-   *
-   * ``file_path`` is the file's path specified as a URI encoded UTF-8 string.
-   * In URI encoding, every character that is not in the regular expression
-   * ``[a-zA-Z0-9/_.~-]`` is encoded as two uppercase hexidecimal digits
-   * proceeded by "%".  Directories in the path are separated by "/".
-   *
-   * ``offset`` is a 0-based byte offset to the start of the code object.  For a
-   * file URI, it is from the start of the file specified by the ``file_path``,
-   * and if omitted defaults to 0. For a memory URI, it is the memory address
-   * and is required.
-   *
-   * ``size`` is the number of bytes in the code object.  For a file URI, if
-   * omitted it defaults to the size of the file.  It is required for a memory
-   * URI.
-   *
-   * ``process_id`` is the identity of the process owning the memory.  For Linux
-   * it is the C unsigned integral decimal literal for the process ID (PID).
-   *
-   * For example:
-   *
-   *     file:///dir1/dir2/file1
-   *     file:///dir3/dir4/file2#offset=0x2000&size=3000
-   *     memory://1234#offset=0x20000&size=3000
-   */
-  HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI = 12,
-} hsa_ven_amd_loader_loaded_code_object_info_t;
-
-/**
- * @brief Get the current value of an attribute for a given loaded code
- * object.
- *
- * @param[in] loaded_code_object Loaded code object.
- *
- * @param[in] attribute Attribute to query.
- *
- * @param[out] value Pointer to an application-allocated buffer where to store
- * the value of the attribute. If the buffer passed by the application is not
- * large enough to hold the value of @p attribute, the behavior is undefined.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT The loaded code object is
- * invalid.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
- * loaded code object attribute, or @p value is NULL.
- */
-hsa_status_t hsa_ven_amd_loader_loaded_code_object_get_info(
-  hsa_loaded_code_object_t loaded_code_object,
-  hsa_ven_amd_loader_loaded_code_object_info_t attribute,
-  void *value);
-
-//===----------------------------------------------------------------------===//
-
-/**
- * @brief Create a code object reader to operate on a file with size and offset.
- *
- * @param[in] file File descriptor. The file must have been opened by
- * application with at least read permissions prior calling this function. The
- * file must contain a vendor-specific code object.
- *
- * The file is owned and managed by the application; the lifetime of the file
- * descriptor must exceed that of any associated code object reader.
- *
- * @param[in] size Size of the code object embedded in @p file.
- *
- * @param[in] offset 0-based offset relative to the beginning of the @p file
- * that denotes the beginning of the code object embedded within the @p file.
- *
- * @param[out] code_object_reader Memory location to store the newly created
- * code object reader handle. Must not be NULL.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
- * initialized.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_FILE @p file is not opened with at least
- * read permissions. This condition may also be reported as
- * ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT_READER by the
- * ::hsa_executable_load_agent_code_object function.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT The bytes starting at offset
- * do not form a valid code object. If file size is 0. Or offset > file size.
- * This condition may also be reported as
- * ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT by the
- * ::hsa_executable_load_agent_code_object function.
- *
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
- * allocate the required resources.
- *
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p code_object_reader is NULL.
- */
-hsa_status_t
-hsa_ven_amd_loader_code_object_reader_create_from_file_with_offset_size(
-    hsa_file_t file,
-    size_t offset,
-    size_t size,
-    hsa_code_object_reader_t *code_object_reader);
-
-//===----------------------------------------------------------------------===//
-
-/**
- * @brief Extension version.
- */
-#define hsa_ven_amd_loader 001002
-
-/**
- * @brief Extension function table version 1.00.
- */
-typedef struct hsa_ven_amd_loader_1_00_pfn_s {
-  hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
-    const void *device_address,
-    const void **host_address);
-
-  hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
-    hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
-    size_t *num_segment_descriptors);
-
-  hsa_status_t (*hsa_ven_amd_loader_query_executable)(
-    const void *device_address,
-    hsa_executable_t *executable);
-} hsa_ven_amd_loader_1_00_pfn_t;
-
-/**
- * @brief Extension function table version 1.01.
- */
-typedef struct hsa_ven_amd_loader_1_01_pfn_s {
-  hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
-    const void *device_address,
-    const void **host_address);
-
-  hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
-    hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
-    size_t *num_segment_descriptors);
-
-  hsa_status_t (*hsa_ven_amd_loader_query_executable)(
-    const void *device_address,
-    hsa_executable_t *executable);
-
-  hsa_status_t (*hsa_ven_amd_loader_executable_iterate_loaded_code_objects)(
-    hsa_executable_t executable,
-    hsa_status_t (*callback)(
-      hsa_executable_t executable,
-      hsa_loaded_code_object_t loaded_code_object,
-      void *data),
-    void *data);
-
-  hsa_status_t (*hsa_ven_amd_loader_loaded_code_object_get_info)(
-    hsa_loaded_code_object_t loaded_code_object,
-    hsa_ven_amd_loader_loaded_code_object_info_t attribute,
-    void *value);
-} hsa_ven_amd_loader_1_01_pfn_t;
-
-/**
- * @brief Extension function table version 1.02.
- */
-typedef struct hsa_ven_amd_loader_1_02_pfn_s {
-  hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
-    const void *device_address,
-    const void **host_address);
-
-  hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
-    hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
-    size_t *num_segment_descriptors);
-
-  hsa_status_t (*hsa_ven_amd_loader_query_executable)(
-    const void *device_address,
-    hsa_executable_t *executable);
-
-  hsa_status_t (*hsa_ven_amd_loader_executable_iterate_loaded_code_objects)(
-    hsa_executable_t executable,
-    hsa_status_t (*callback)(
-      hsa_executable_t executable,
-      hsa_loaded_code_object_t loaded_code_object,
-      void *data),
-    void *data);
-
-  hsa_status_t (*hsa_ven_amd_loader_loaded_code_object_get_info)(
-    hsa_loaded_code_object_t loaded_code_object,
-    hsa_ven_amd_loader_loaded_code_object_info_t attribute,
-    void *value);
-
-  hsa_status_t
-    (*hsa_ven_amd_loader_code_object_reader_create_from_file_with_offset_size)(
-      hsa_file_t file,
-      size_t offset,
-      size_t size,
-      hsa_code_object_reader_t *code_object_reader);
-} hsa_ven_amd_loader_1_02_pfn_t;
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif /* HSA_VEN_AMD_LOADER_H */
diff --git a/third_party/rocm/version.txt b/third_party/rocm/version.txt
deleted file mode 100644
index 21016b3..0000000
--- a/third_party/rocm/version.txt
+++ /dev/null
@@ -1 +0,0 @@
-4.1.1-34