blob: c3ff7472a6ca479b0122557c378bee0e236550ee [file] [log] [blame]
// Copied from https://github.com/dmlc/dlpack/tree/main/include/dlpack
// at commit bbd2f4d32427e548797929af08cfe2a9cbb3cf12.
// See Apache License: https://github.com/dmlc/dlpack/blob/main/LICENSE
/*!
* Copyright (c) 2017 by Contributors
* \file dlpack.h
* \brief The common header of DLPack.
*/
#ifndef DLPACK_DLPACK_H_
#define DLPACK_DLPACK_H_
/**
* \brief Compatibility with C++
*/
#ifdef __cplusplus
#define DLPACK_EXTERN_C extern "C"
#else
#define DLPACK_EXTERN_C
#endif
/*! \brief The current major version of dlpack */
#define DLPACK_MAJOR_VERSION 1
/*! \brief The current minor version of dlpack */
#define DLPACK_MINOR_VERSION 0
/*! \brief DLPACK_DLL prefix for windows */
#ifdef _WIN32
#ifdef DLPACK_EXPORTS
#define DLPACK_DLL __declspec(dllexport)
#else
#define DLPACK_DLL __declspec(dllimport)
#endif
#else
#define DLPACK_DLL
#endif
#include <stddef.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
/*!
* \brief The DLPack version.
*
* A change in major version indicates that we have changed the
* data layout of the ABI - DLManagedTensorVersioned.
*
* A change in minor version indicates that we have added new
* code, such as a new device type, but the ABI is kept the same.
*
* If an obtained DLPack tensor has a major version that disagrees
* with the version number specified in this header file
* (i.e. major != DLPACK_MAJOR_VERSION), the consumer must call the deleter
* (and it is safe to do so). It is not safe to access any other fields
* as the memory layout will have changed.
*
* In the case of a minor version mismatch, the tensor can be safely used as
* long as the consumer knows how to interpret all fields. Minor version
* updates indicate the addition of enumeration values.
*/
typedef struct {
/*! \brief DLPack major version. */
uint32_t major;
/*! \brief DLPack minor version. */
uint32_t minor;
} DLPackVersion;
/*!
* \brief The device type in DLDevice.
*/
#ifdef __cplusplus
typedef enum : int32_t {
#else
typedef enum {
#endif
/*! \brief CPU device */
kDLCPU = 1,
/*! \brief CUDA GPU device */
kDLCUDA = 2,
/*!
* \brief Pinned CUDA CPU memory by cudaMallocHost
*/
kDLCUDAHost = 3,
/*! \brief OpenCL devices. */
kDLOpenCL = 4,
/*! \brief Vulkan buffer for next generation graphics. */
kDLVulkan = 7,
/*! \brief Metal for Apple GPU. */
kDLMetal = 8,
/*! \brief Verilog simulator buffer */
kDLVPI = 9,
/*! \brief ROCm GPUs for AMD GPUs */
kDLROCM = 10,
/*!
* \brief Pinned ROCm CPU memory allocated by hipMallocHost
*/
kDLROCMHost = 11,
/*!
* \brief Reserved extension device type,
* used for quickly test extension device
* The semantics can differ depending on the implementation.
*/
kDLExtDev = 12,
/*!
* \brief CUDA managed/unified memory allocated by cudaMallocManaged
*/
kDLCUDAManaged = 13,
/*!
* \brief Unified shared memory allocated on a oneAPI non-partititioned
* device. Call to oneAPI runtime is required to determine the device
* type, the USM allocation type and the sycl context it is bound to.
*
*/
kDLOneAPI = 14,
/*! \brief GPU support for next generation WebGPU standard. */
kDLWebGPU = 15,
/*! \brief Qualcomm Hexagon DSP */
kDLHexagon = 16,
/*! \brief Microsoft MAIA devices */
kDLMAIA = 17,
} DLDeviceType;
/*!
* \brief A Device for Tensor and operator.
*/
typedef struct {
/*! \brief The device type used in the device. */
DLDeviceType device_type;
/*!
* \brief The device index.
* For vanilla CPU memory, pinned memory, or managed memory, this is set to 0.
*/
int32_t device_id;
} DLDevice;
/*!
* \brief The type code options DLDataType.
*/
typedef enum {
/*! \brief signed integer */
kDLInt = 0U,
/*! \brief unsigned integer */
kDLUInt = 1U,
/*! \brief IEEE floating point */
kDLFloat = 2U,
/*!
* \brief Opaque handle type, reserved for testing purposes.
* Frameworks need to agree on the handle data type for the exchange to be
* well-defined.
*/
kDLOpaqueHandle = 3U,
/*! \brief bfloat16 */
kDLBfloat = 4U,
/*!
* \brief complex number
* (C/C++/Python layout: compact struct per complex number)
*/
kDLComplex = 5U,
/*! \brief boolean */
kDLBool = 6U,
} DLDataTypeCode;
/*!
* \brief The data type the tensor can hold. The data type is assumed to follow
* the native endian-ness. An explicit error message should be raised when
* attempting to export an array with non-native endianness
*
* Examples
* - float: type_code = 2, bits = 32, lanes = 1
* - float4(vectorized 4 float): type_code = 2, bits = 32, lanes = 4
* - int8: type_code = 0, bits = 8, lanes = 1
* - std::complex<float>: type_code = 5, bits = 64, lanes = 1
* - bool: type_code = 6, bits = 8, lanes = 1 (as per common array library
* convention, the underlying storage size of bool is 8 bits)
*/
typedef struct {
/*!
* \brief Type code of base types.
* We keep it uint8_t instead of DLDataTypeCode for minimal memory
* footprint, but the value should be one of DLDataTypeCode enum values.
* */
uint8_t code;
/*!
* \brief Number of bits, common choices are 8, 16, 32.
*/
uint8_t bits;
/*! \brief Number of lanes in the type, used for vector types. */
uint16_t lanes;
} DLDataType;
/*!
* \brief Plain C Tensor object, does not manage memory.
*/
typedef struct {
/*!
* \brief The data pointer points to the allocated data. This will be CUDA
* device pointer or cl_mem handle in OpenCL. It may be opaque on some device
* types. This pointer is always aligned to 256 bytes as in CUDA. The
* `byte_offset` field should be used to point to the beginning of the data.
*
* Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow,
* TVM, perhaps others) do not adhere to this 256 byte aligment requirement
* on CPU/CUDA/ROCm, and always use `byte_offset=0`. This must be fixed
* (after which this note will be updated); at the moment it is recommended
* to not rely on the data pointer being correctly aligned.
*
* For given DLTensor, the size of memory required to store the contents of
* data is calculated as follows:
*
* \code{.c}
* static inline size_t GetDataSize(const DLTensor* t) {
* size_t size = 1;
* for (tvm_index_t i = 0; i < t->ndim; ++i) {
* size *= t->shape[i];
* }
* size *= (t->dtype.bits * t->dtype.lanes + 7) / 8;
* return size;
* }
* \endcode
*
* Note that if the tensor is of size zero, then the data pointer should be
* set to `NULL`.
*/
void* data;
/*! \brief The device of the tensor */
DLDevice device;
/*! \brief Number of dimensions */
int32_t ndim;
/*! \brief The data type of the pointer*/
DLDataType dtype;
/*! \brief The shape of the tensor */
int64_t* shape;
/*!
* \brief strides of the tensor (in number of elements, not bytes)
* can be NULL, indicating tensor is compact and row-majored.
*/
int64_t* strides;
/*! \brief The offset in bytes to the beginning pointer to data */
uint64_t byte_offset;
} DLTensor;
/*!
* \brief C Tensor object, manage memory of DLTensor. This data structure is
* intended to facilitate the borrowing of DLTensor by another framework. It is
* not meant to transfer the tensor. When the borrowing framework doesn't need
* the tensor, it should call the deleter to notify the host that the resource
* is no longer needed.
*
* \note This data structure is used as Legacy DLManagedTensor
* in DLPack exchange and is deprecated after DLPack v0.8
* Use DLManagedTensorVersioned instead.
* This data structure may get renamed or deleted in future versions.
*
* \sa DLManagedTensorVersioned
*/
typedef struct DLManagedTensor {
/*! \brief DLTensor which is being memory managed */
DLTensor dl_tensor;
/*! \brief the context of the original host framework of DLManagedTensor in
* which DLManagedTensor is used in the framework. It can also be NULL.
*/
void* manager_ctx;
/*!
* \brief Destructor - this should be called
* to destruct the manager_ctx which backs the DLManagedTensor. It can be
* NULL if there is no way for the caller to provide a reasonable destructor.
* The destructor deletes the argument self as well.
*/
void (*deleter)(struct DLManagedTensor* self);
} DLManagedTensor;
// bit masks used in in the DLManagedTensorVersioned
/*! \brief bit mask to indicate that the tensor is read only. */
#define DLPACK_FLAG_BITMASK_READ_ONLY (1UL << 0UL)
/*!
* \brief bit mask to indicate that the tensor is a copy made by the producer.
*
* If set, the tensor is considered solely owned throughout its lifetime by the
* consumer, until the producer-provided deleter is invoked.
*/
#define DLPACK_FLAG_BITMASK_IS_COPIED (1UL << 1UL)
/*!
* \brief A versioned and managed C Tensor object, manage memory of DLTensor.
*
* This data structure is intended to facilitate the borrowing of DLTensor by
* another framework. It is not meant to transfer the tensor. When the borrowing
* framework doesn't need the tensor, it should call the deleter to notify the
* host that the resource is no longer needed.
*
* \note This is the current standard DLPack exchange data structure.
*/
struct DLManagedTensorVersioned {
/*!
* \brief The API and ABI version of the current managed Tensor
*/
DLPackVersion version;
/*!
* \brief the context of the original host framework.
*
* Stores DLManagedTensorVersioned is used in the
* framework. It can also be NULL.
*/
void* manager_ctx;
/*!
* \brief Destructor.
*
* This should be called to destruct manager_ctx which holds the
* DLManagedTensorVersioned. It can be NULL if there is no way for the caller
* to provide a reasonable destructor. The destructor deletes the argument
* self as well.
*/
void (*deleter)(struct DLManagedTensorVersioned* self);
/*!
* \brief Additional bitmask flags information about the tensor.
*
* By default the flags should be set to 0.
*
* \note Future ABI changes should keep everything until this field
* stable, to ensure that deleter can be correctly called.
*
* \sa DLPACK_FLAG_BITMASK_READ_ONLY
* \sa DLPACK_FLAG_BITMASK_IS_COPIED
*/
uint64_t flags;
/*! \brief DLTensor which is being memory managed */
DLTensor dl_tensor;
};
#ifdef __cplusplus
} // DLPACK_EXTERN_C
#endif
#endif // DLPACK_DLPACK_H_