runtime/bindings/python/local_dlpack.h - 3p/openxla/iree - Git at Google

 // Copied from https://github.com/dmlc/dlpack/tree/main/include/dlpack
 // at commit bbd2f4d32427e548797929af08cfe2a9cbb3cf12.
 // See Apache License: https://github.com/dmlc/dlpack/blob/main/LICENSE

 /*!
  *  Copyright (c) 2017 by Contributors
  * \file dlpack.h
  * \brief The common header of DLPack.
  */
 #ifndef DLPACK_DLPACK_H_
 #define DLPACK_DLPACK_H_

 /**
  * \brief Compatibility with C++
  */
 #ifdef __cplusplus
 #define DLPACK_EXTERN_C extern "C"
 #else
 #define DLPACK_EXTERN_C
 #endif

 /*! \brief The current major version of dlpack */
 #define DLPACK_MAJOR_VERSION 1

 /*! \brief The current minor version of dlpack */
 #define DLPACK_MINOR_VERSION 0

 /*! \brief DLPACK_DLL prefix for windows */
 #ifdef _WIN32
 #ifdef DLPACK_EXPORTS
 #define DLPACK_DLL __declspec(dllexport)
 #else
 #define DLPACK_DLL __declspec(dllimport)
 #endif
 #else
 #define DLPACK_DLL
 #endif

 #include <stddef.h>
 #include <stdint.h>

 #ifdef __cplusplus
 extern "C" {
 #endif

 /*!
  * \brief The DLPack version.
  *
  * A change in major version indicates that we have changed the
  * data layout of the ABI - DLManagedTensorVersioned.
  *
  * A change in minor version indicates that we have added new
  * code, such as a new device type, but the ABI is kept the same.
  *
  * If an obtained DLPack tensor has a major version that disagrees
  * with the version number specified in this header file
  * (i.e. major != DLPACK_MAJOR_VERSION), the consumer must call the deleter
  * (and it is safe to do so). It is not safe to access any other fields
  * as the memory layout will have changed.
  *
  * In the case of a minor version mismatch, the tensor can be safely used as
  * long as the consumer knows how to interpret all fields. Minor version
  * updates indicate the addition of enumeration values.
  */
 typedef struct {
   /*! \brief DLPack major version. */
   uint32_t major;
   /*! \brief DLPack minor version. */
   uint32_t minor;
 } DLPackVersion;

 /*!
  * \brief The device type in DLDevice.
  */
 #ifdef __cplusplus
 typedef enum : int32_t {
 #else
 typedef enum {
 #endif
   /*! \brief CPU device */
   kDLCPU = 1,
   /*! \brief CUDA GPU device */
   kDLCUDA = 2,
   /*!
    * \brief Pinned CUDA CPU memory by cudaMallocHost
    */
   kDLCUDAHost = 3,
   /*! \brief OpenCL devices. */
   kDLOpenCL = 4,
   /*! \brief Vulkan buffer for next generation graphics. */
   kDLVulkan = 7,
   /*! \brief Metal for Apple GPU. */
   kDLMetal = 8,
   /*! \brief Verilog simulator buffer */
   kDLVPI = 9,
   /*! \brief ROCm GPUs for AMD GPUs */
   kDLROCM = 10,
   /*!
    * \brief Pinned ROCm CPU memory allocated by hipMallocHost
    */
   kDLROCMHost = 11,
   /*!
    * \brief Reserved extension device type,
    * used for quickly test extension device
    * The semantics can differ depending on the implementation.
    */
   kDLExtDev = 12,
   /*!
    * \brief CUDA managed/unified memory allocated by cudaMallocManaged
    */
   kDLCUDAManaged = 13,
   /*!
    * \brief Unified shared memory allocated on a oneAPI non-partititioned
    * device. Call to oneAPI runtime is required to determine the device
    * type, the USM allocation type and the sycl context it is bound to.
    *
    */
   kDLOneAPI = 14,
   /*! \brief GPU support for next generation WebGPU standard. */
   kDLWebGPU = 15,
   /*! \brief Qualcomm Hexagon DSP */
   kDLHexagon = 16,
   /*! \brief Microsoft MAIA devices */
   kDLMAIA = 17,
 } DLDeviceType;

 /*!
  * \brief A Device for Tensor and operator.
  */
 typedef struct {
   /*! \brief The device type used in the device. */
   DLDeviceType device_type;
   /*!
    * \brief The device index.
    * For vanilla CPU memory, pinned memory, or managed memory, this is set to 0.
    */
   int32_t device_id;
 } DLDevice;

 /*!
  * \brief The type code options DLDataType.
  */
 typedef enum {
   /*! \brief signed integer */
   kDLInt = 0U,
   /*! \brief unsigned integer */
   kDLUInt = 1U,
   /*! \brief IEEE floating point */
   kDLFloat = 2U,
   /*!
    * \brief Opaque handle type, reserved for testing purposes.
    * Frameworks need to agree on the handle data type for the exchange to be
    * well-defined.
    */
   kDLOpaqueHandle = 3U,
   /*! \brief bfloat16 */
   kDLBfloat = 4U,
   /*!
    * \brief complex number
    * (C/C++/Python layout: compact struct per complex number)
    */
   kDLComplex = 5U,
   /*! \brief boolean */
   kDLBool = 6U,
 } DLDataTypeCode;

 /*!
  * \brief The data type the tensor can hold. The data type is assumed to follow
  * the native endian-ness. An explicit error message should be raised when
  * attempting to export an array with non-native endianness
  *
  *  Examples
  *   - float: type_code = 2, bits = 32, lanes = 1
  *   - float4(vectorized 4 float): type_code = 2, bits = 32, lanes = 4
  *   - int8: type_code = 0, bits = 8, lanes = 1
  *   - std::complex<float>: type_code = 5, bits = 64, lanes = 1
  *   - bool: type_code = 6, bits = 8, lanes = 1 (as per common array library
  * convention, the underlying storage size of bool is 8 bits)
  */
 typedef struct {
   /*!
    * \brief Type code of base types.
    * We keep it uint8_t instead of DLDataTypeCode for minimal memory
    * footprint, but the value should be one of DLDataTypeCode enum values.
    * */
   uint8_t code;
   /*!
    * \brief Number of bits, common choices are 8, 16, 32.
    */
   uint8_t bits;
   /*! \brief Number of lanes in the type, used for vector types. */
   uint16_t lanes;
 } DLDataType;

 /*!
  * \brief Plain C Tensor object, does not manage memory.
  */
 typedef struct {
   /*!
    * \brief The data pointer points to the allocated data. This will be CUDA
    * device pointer or cl_mem handle in OpenCL. It may be opaque on some device
    * types. This pointer is always aligned to 256 bytes as in CUDA. The
    * `byte_offset` field should be used to point to the beginning of the data.
    *
    * Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow,
    * TVM, perhaps others) do not adhere to this 256 byte aligment requirement
    * on CPU/CUDA/ROCm, and always use `byte_offset=0`.  This must be fixed
    * (after which this note will be updated); at the moment it is recommended
    * to not rely on the data pointer being correctly aligned.
    *
    * For given DLTensor, the size of memory required to store the contents of
    * data is calculated as follows:
    *
    * \code{.c}
    * static inline size_t GetDataSize(const DLTensor* t) {
    *   size_t size = 1;
    *   for (tvm_index_t i = 0; i < t->ndim; ++i) {
    *     size *= t->shape[i];
    *   }
    *   size *= (t->dtype.bits * t->dtype.lanes + 7) / 8;
    *   return size;
    * }
    * \endcode
    *
    * Note that if the tensor is of size zero, then the data pointer should be
    * set to `NULL`.
    */
   void* data;
   /*! \brief The device of the tensor */
   DLDevice device;
   /*! \brief Number of dimensions */
   int32_t ndim;
   /*! \brief The data type of the pointer*/
   DLDataType dtype;
   /*! \brief The shape of the tensor */
   int64_t* shape;
   /*!
    * \brief strides of the tensor (in number of elements, not bytes)
    *  can be NULL, indicating tensor is compact and row-majored.
    */
   int64_t* strides;
   /*! \brief The offset in bytes to the beginning pointer to data */
   uint64_t byte_offset;
 } DLTensor;

 /*!
  * \brief C Tensor object, manage memory of DLTensor. This data structure is
  *  intended to facilitate the borrowing of DLTensor by another framework. It is
  *  not meant to transfer the tensor. When the borrowing framework doesn't need
  *  the tensor, it should call the deleter to notify the host that the resource
  *  is no longer needed.
  *
  * \note This data structure is used as Legacy DLManagedTensor
  *       in DLPack exchange and is deprecated after DLPack v0.8
  *       Use DLManagedTensorVersioned instead.
  *       This data structure may get renamed or deleted in future versions.
  *
  * \sa DLManagedTensorVersioned
  */
 typedef struct DLManagedTensor {
   /*! \brief DLTensor which is being memory managed */
   DLTensor dl_tensor;
   /*! \brief the context of the original host framework of DLManagedTensor in
    *   which DLManagedTensor is used in the framework. It can also be NULL.
    */
   void* manager_ctx;
   /*!
    * \brief Destructor - this should be called
    * to destruct the manager_ctx  which backs the DLManagedTensor. It can be
    * NULL if there is no way for the caller to provide a reasonable destructor.
    * The destructor deletes the argument self as well.
    */
   void (*deleter)(struct DLManagedTensor* self);
 } DLManagedTensor;

 // bit masks used in in the DLManagedTensorVersioned

 /*! \brief bit mask to indicate that the tensor is read only. */
 #define DLPACK_FLAG_BITMASK_READ_ONLY (1UL << 0UL)

 /*!
  * \brief bit mask to indicate that the tensor is a copy made by the producer.
  *
  * If set, the tensor is considered solely owned throughout its lifetime by the
  * consumer, until the producer-provided deleter is invoked.
  */
 #define DLPACK_FLAG_BITMASK_IS_COPIED (1UL << 1UL)

 /*!
  * \brief A versioned and managed C Tensor object, manage memory of DLTensor.
  *
  * This data structure is intended to facilitate the borrowing of DLTensor by
  * another framework. It is not meant to transfer the tensor. When the borrowing
  * framework doesn't need the tensor, it should call the deleter to notify the
  * host that the resource is no longer needed.
  *
  * \note This is the current standard DLPack exchange data structure.
  */
 struct DLManagedTensorVersioned {
   /*!
    * \brief The API and ABI version of the current managed Tensor
    */
   DLPackVersion version;
   /*!
    * \brief the context of the original host framework.
    *
    * Stores DLManagedTensorVersioned is used in the
    * framework. It can also be NULL.
    */
   void* manager_ctx;
   /*!
    * \brief Destructor.
    *
    * This should be called to destruct manager_ctx which holds the
    * DLManagedTensorVersioned. It can be NULL if there is no way for the caller
    * to provide a reasonable destructor. The destructor deletes the argument
    * self as well.
    */
   void (*deleter)(struct DLManagedTensorVersioned* self);
   /*!
    * \brief Additional bitmask flags information about the tensor.
    *
    * By default the flags should be set to 0.
    *
    * \note Future ABI changes should keep everything until this field
    *       stable, to ensure that deleter can be correctly called.
    *
    * \sa DLPACK_FLAG_BITMASK_READ_ONLY
    * \sa DLPACK_FLAG_BITMASK_IS_COPIED
    */
   uint64_t flags;
   /*! \brief DLTensor which is being memory managed */
   DLTensor dl_tensor;
 };

 #ifdef __cplusplus
 }  // DLPACK_EXTERN_C
 #endif
 #endif  // DLPACK_DLPACK_H_
	// Copied from https://github.com/dmlc/dlpack/tree/main/include/dlpack
	// at commit bbd2f4d32427e548797929af08cfe2a9cbb3cf12.
	// See Apache License: https://github.com/dmlc/dlpack/blob/main/LICENSE

	/*!
	* Copyright (c) 2017 by Contributors
	* \file dlpack.h
	* \brief The common header of DLPack.
	*/
	#ifndef DLPACK_DLPACK_H_
	#define DLPACK_DLPACK_H_

	/**
	* \brief Compatibility with C++
	*/
	#ifdef __cplusplus
	#define DLPACK_EXTERN_C extern "C"
	#else
	#define DLPACK_EXTERN_C
	#endif

	/! \brief The current major version of dlpack /
	#define DLPACK_MAJOR_VERSION 1

	/! \brief The current minor version of dlpack /
	#define DLPACK_MINOR_VERSION 0

	/! \brief DLPACK_DLL prefix for windows /
	#ifdef _WIN32
	#ifdef DLPACK_EXPORTS
	#define DLPACK_DLL __declspec(dllexport)
	#else
	#define DLPACK_DLL __declspec(dllimport)
	#endif
	#else
	#define DLPACK_DLL
	#endif

	#include <stddef.h>
	#include <stdint.h>

	#ifdef __cplusplus
	extern "C" {
	#endif

	/*!
	* \brief The DLPack version.
	*
	* A change in major version indicates that we have changed the
	* data layout of the ABI - DLManagedTensorVersioned.
	*
	* A change in minor version indicates that we have added new
	* code, such as a new device type, but the ABI is kept the same.
	*
	* If an obtained DLPack tensor has a major version that disagrees
	* with the version number specified in this header file
	* (i.e. major != DLPACK_MAJOR_VERSION), the consumer must call the deleter
	* (and it is safe to do so). It is not safe to access any other fields
	* as the memory layout will have changed.
	*
	* In the case of a minor version mismatch, the tensor can be safely used as
	* long as the consumer knows how to interpret all fields. Minor version
	* updates indicate the addition of enumeration values.
	*/
	typedef struct {
	/! \brief DLPack major version. /
	uint32_t major;
	/! \brief DLPack minor version. /
	uint32_t minor;
	} DLPackVersion;

	/*!
	* \brief The device type in DLDevice.
	*/
	#ifdef __cplusplus
	typedef enum : int32_t {
	#else
	typedef enum {
	#endif
	/! \brief CPU device /
	kDLCPU = 1,
	/! \brief CUDA GPU device /
	kDLCUDA = 2,
	/*!
	* \brief Pinned CUDA CPU memory by cudaMallocHost
	*/
	kDLCUDAHost = 3,
	/! \brief OpenCL devices. /
	kDLOpenCL = 4,
	/! \brief Vulkan buffer for next generation graphics. /
	kDLVulkan = 7,
	/! \brief Metal for Apple GPU. /
	kDLMetal = 8,
	/! \brief Verilog simulator buffer /
	kDLVPI = 9,
	/! \brief ROCm GPUs for AMD GPUs /
	kDLROCM = 10,
	/*!
	* \brief Pinned ROCm CPU memory allocated by hipMallocHost
	*/
	kDLROCMHost = 11,
	/*!
	* \brief Reserved extension device type,
	* used for quickly test extension device
	* The semantics can differ depending on the implementation.
	*/
	kDLExtDev = 12,
	/*!
	* \brief CUDA managed/unified memory allocated by cudaMallocManaged
	*/
	kDLCUDAManaged = 13,
	/*!
	* \brief Unified shared memory allocated on a oneAPI non-partititioned
	* device. Call to oneAPI runtime is required to determine the device
	* type, the USM allocation type and the sycl context it is bound to.
	*
	*/
	kDLOneAPI = 14,
	/! \brief GPU support for next generation WebGPU standard. /
	kDLWebGPU = 15,
	/! \brief Qualcomm Hexagon DSP /
	kDLHexagon = 16,
	/! \brief Microsoft MAIA devices /
	kDLMAIA = 17,
	} DLDeviceType;

	/*!
	* \brief A Device for Tensor and operator.
	*/
	typedef struct {
	/! \brief The device type used in the device. /
	DLDeviceType device_type;
	/*!
	* \brief The device index.
	* For vanilla CPU memory, pinned memory, or managed memory, this is set to 0.
	*/
	int32_t device_id;
	} DLDevice;

	/*!
	* \brief The type code options DLDataType.
	*/
	typedef enum {
	/! \brief signed integer /
	kDLInt = 0U,
	/! \brief unsigned integer /
	kDLUInt = 1U,
	/! \brief IEEE floating point /
	kDLFloat = 2U,
	/*!
	* \brief Opaque handle type, reserved for testing purposes.
	* Frameworks need to agree on the handle data type for the exchange to be
	* well-defined.
	*/
	kDLOpaqueHandle = 3U,
	/! \brief bfloat16 /
	kDLBfloat = 4U,
	/*!
	* \brief complex number
	* (C/C++/Python layout: compact struct per complex number)
	*/
	kDLComplex = 5U,
	/! \brief boolean /
	kDLBool = 6U,
	} DLDataTypeCode;

	/*!
	* \brief The data type the tensor can hold. The data type is assumed to follow
	* the native endian-ness. An explicit error message should be raised when
	* attempting to export an array with non-native endianness
	*
	* Examples
	* - float: type_code = 2, bits = 32, lanes = 1
	* - float4(vectorized 4 float): type_code = 2, bits = 32, lanes = 4
	* - int8: type_code = 0, bits = 8, lanes = 1
	* - std::complex<float>: type_code = 5, bits = 64, lanes = 1
	* - bool: type_code = 6, bits = 8, lanes = 1 (as per common array library
	* convention, the underlying storage size of bool is 8 bits)
	*/
	typedef struct {
	/*!
	* \brief Type code of base types.
	* We keep it uint8_t instead of DLDataTypeCode for minimal memory
	* footprint, but the value should be one of DLDataTypeCode enum values.
	* */
	uint8_t code;
	/*!
	* \brief Number of bits, common choices are 8, 16, 32.
	*/
	uint8_t bits;
	/! \brief Number of lanes in the type, used for vector types. /
	uint16_t lanes;
	} DLDataType;

	/*!
	* \brief Plain C Tensor object, does not manage memory.
	*/
	typedef struct {
	/*!
	* \brief The data pointer points to the allocated data. This will be CUDA
	* device pointer or cl_mem handle in OpenCL. It may be opaque on some device
	* types. This pointer is always aligned to 256 bytes as in CUDA. The
	* `byte_offset` field should be used to point to the beginning of the data.
	*
	* Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow,
	* TVM, perhaps others) do not adhere to this 256 byte aligment requirement
	* on CPU/CUDA/ROCm, and always use `byte_offset=0`. This must be fixed
	* (after which this note will be updated); at the moment it is recommended
	* to not rely on the data pointer being correctly aligned.
	*
	* For given DLTensor, the size of memory required to store the contents of
	* data is calculated as follows:
	*
	* \code{.c}
	* static inline size_t GetDataSize(const DLTensor* t) {
	* size_t size = 1;
	* for (tvm_index_t i = 0; i < t->ndim; ++i) {
	* size *= t->shape[i];
	* }
	* size = (t->dtype.bits t->dtype.lanes + 7) / 8;
	* return size;
	* }
	* \endcode
	*
	* Note that if the tensor is of size zero, then the data pointer should be
	* set to `NULL`.
	*/
	void* data;
	/! \brief The device of the tensor /
	DLDevice device;
	/! \brief Number of dimensions /
	int32_t ndim;
	/! \brief The data type of the pointer/
	DLDataType dtype;
	/! \brief The shape of the tensor /
	int64_t* shape;
	/*!
	* \brief strides of the tensor (in number of elements, not bytes)
	* can be NULL, indicating tensor is compact and row-majored.
	*/
	int64_t* strides;
	/! \brief The offset in bytes to the beginning pointer to data /
	uint64_t byte_offset;
	} DLTensor;

	/*!
	* \brief C Tensor object, manage memory of DLTensor. This data structure is
	* intended to facilitate the borrowing of DLTensor by another framework. It is
	* not meant to transfer the tensor. When the borrowing framework doesn't need
	* the tensor, it should call the deleter to notify the host that the resource
	* is no longer needed.
	*
	* \note This data structure is used as Legacy DLManagedTensor
	* in DLPack exchange and is deprecated after DLPack v0.8
	* Use DLManagedTensorVersioned instead.
	* This data structure may get renamed or deleted in future versions.
	*
	* \sa DLManagedTensorVersioned
	*/
	typedef struct DLManagedTensor {
	/! \brief DLTensor which is being memory managed /
	DLTensor dl_tensor;
	/*! \brief the context of the original host framework of DLManagedTensor in
	* which DLManagedTensor is used in the framework. It can also be NULL.
	*/
	void* manager_ctx;
	/*!
	* \brief Destructor - this should be called
	* to destruct the manager_ctx which backs the DLManagedTensor. It can be
	* NULL if there is no way for the caller to provide a reasonable destructor.
	* The destructor deletes the argument self as well.
	*/
	void (deleter)(struct DLManagedTensor self);
	} DLManagedTensor;

	// bit masks used in in the DLManagedTensorVersioned

	/! \brief bit mask to indicate that the tensor is read only. /
	#define DLPACK_FLAG_BITMASK_READ_ONLY (1UL << 0UL)

	/*!
	* \brief bit mask to indicate that the tensor is a copy made by the producer.
	*
	* If set, the tensor is considered solely owned throughout its lifetime by the
	* consumer, until the producer-provided deleter is invoked.
	*/
	#define DLPACK_FLAG_BITMASK_IS_COPIED (1UL << 1UL)

	/*!
	* \brief A versioned and managed C Tensor object, manage memory of DLTensor.
	*
	* This data structure is intended to facilitate the borrowing of DLTensor by
	* another framework. It is not meant to transfer the tensor. When the borrowing
	* framework doesn't need the tensor, it should call the deleter to notify the
	* host that the resource is no longer needed.
	*
	* \note This is the current standard DLPack exchange data structure.
	*/
	struct DLManagedTensorVersioned {
	/*!
	* \brief The API and ABI version of the current managed Tensor
	*/
	DLPackVersion version;
	/*!
	* \brief the context of the original host framework.
	*
	* Stores DLManagedTensorVersioned is used in the
	* framework. It can also be NULL.
	*/
	void* manager_ctx;
	/*!
	* \brief Destructor.
	*
	* This should be called to destruct manager_ctx which holds the
	* DLManagedTensorVersioned. It can be NULL if there is no way for the caller
	* to provide a reasonable destructor. The destructor deletes the argument
	* self as well.
	*/
	void (deleter)(struct DLManagedTensorVersioned self);
	/*!
	* \brief Additional bitmask flags information about the tensor.
	*
	* By default the flags should be set to 0.
	*
	* \note Future ABI changes should keep everything until this field
	* stable, to ensure that deleter can be correctly called.
	*
	* \sa DLPACK_FLAG_BITMASK_READ_ONLY
	* \sa DLPACK_FLAG_BITMASK_IS_COPIED
	*/
	uint64_t flags;
	/! \brief DLTensor which is being memory managed /
	DLTensor dl_tensor;
	};

	#ifdef __cplusplus
	} // DLPACK_EXTERN_C
	#endif
	#endif // DLPACK_DLPACK_H_