blob: 96fd002c6293aff852a4a14859a6b8aad5682655 [file] [log] [blame] [edit]
/*
* Copyright 2020, Data61, CSIRO (ABN 41 687 119 230)
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include <utils/arith.h>
#include <utils/base64.h>
/*
* Streaming base64 CBOR encoder
*
* This implementation is intended to allow structured data to be
* streamed out via a serial connection in a manner that minimises the
* number of actual bytes that must be written to the output.
*
* Data is streamed to an output as base64 encoded CBOR which can then
* be extracted from a serial log and decoded offline.
*/
typedef struct {
base64_t streamer;
} cbor64_t;
/* Major types of CBOR items */
typedef enum {
CBOR64_MT_UNSIGNED_INT = 0,
CBOR64_MT_NEGATIVE_INT = 1,
CBOR64_MT_BYTE_STRING = 2,
CBOR64_MT_UTF8_STRING = 3,
CBOR64_MT_ARRAY = 4,
CBOR64_MT_MAP = 5,
CBOR64_MT_TAG = 6,
CBOR64_MT_FLOAT = 7,
CBOR64_MT_SIMPLE = 7,
CBOR64_MT_BREAK = 7,
} cbor64_mt_t;
/* Additional information identifiers */
typedef enum {
/* Values below 24 are integer literals */
CBOR64_AI_INT_LITERAL_MAX = 24,
/* Numeric value sizes */
CBOR64_AI_UINT8_T = 24,
CBOR64_AI_UINT16_T = 25,
CBOR64_AI_UINT32_T = 26,
CBOR64_AI_UINT64_T = 27,
/* Simple value indicated in next bytes */
CBOR64_AI_SIMPLE_BYTE = 24,
/* Float sizes */
CBOR64_AI_FLOAT16_T = 25, /* IEEE 754 Half-precision */
CBOR64_AI_FLOAT32_T = 26, /* IEEE 754 Single-precision */
CBOR64_AI_FLOAT64_T = 27, /* IEEE 754 Double-precision */
/* Array/map length specifier */
CBOR64_AI_INDEFINITE_LENGTH = 31,
} cbor64_ai_t;
/* Simple values */
typedef enum {
/* Boolean */
CBOR64_SIMPLE_FALSE = 20,
CBOR64_SIMPLE_TRUE = 21,
/* Null */
CBOR64_SIMPLE_NULL = 22,
/* Undefined */
CBOR64_SIMPLE_UNDEFINED = 23,
} cbor64_simple_t;
/* tags */
typedef enum {
/* Semantic descriptors */
/* Date & time (encoded as UTF-8 string) */
CBOR64_TAG_DATETIME_UTF8 = 0,
/* Date & time encoded relative to an epoch */
CBOR64_TAG_DATETIME_EPOCH = 1,
/* Big integers (encoded as bytes) */
CBOR64_TAG_POSITIVE_BIGNUM = 2,
CBOR64_TAG_NEGATIVE_BIGNUM = 3,
/* Decimal fraction (encoded as array 2 integers (mantissa, base 10 scale)) */
CBOR64_TAG_DECIMAL_FRACTION = 4,
/* Big float (encoded as array 2 integers (mantissa, base 2 scale)) */
CBOR64_TAG_BIG_FLOAT = 4,
/* Encoding hints */
/* Encode byte string children as base64url */
CBOR64_TAG_ENCODE_BASE64URL = 21,
/* Encode byte string children as base64 */
CBOR64_TAG_ENCODE_BASE64 = 22,
/* Encode byte string children as base16 */
CBOR64_TAG_ENCODE_BASE16 = 23,
/* Byte string encodes CBOR item */
CBOR64_TAG_ENCODE_CBOR = 24,
/* UTF-8 String descriptors */
/* String is a URI */
CBOR64_TAG_UTF8_URI = 32,
/* String is a base64url */
CBOR64_TAG_UTF8_BASE64URL = 33,
/* String is a base64 */
CBOR64_TAG_UTF8_BASE64 = 34,
/* String is a PCRE/ECMA262 regular expression */
CBOR64_TAG_UTF8_RE = 35,
/* String MIME message */
CBOR64_TAG_UTF8_MIME = 36,
/* Shared values */
/* A value that may later be referenced */
CBOR64_TAG_SHAREABLE = 28,
/* A reference to a previously shared value */
CBOR64_TAG_SHARED_VALUE = 29,
/* String referneces */
/* A reference to a previously tagged string */
CBOR64_TAG_STRING_REF = 25,
/* A domain containing string references */
CBOR64_TAG_STRING_REF_DOMAIN = 256,
/* Self-described CBOR (magic bytes) */
CBOR64_TAG_SELF_DESCRIBED = 55799,
} cbor64_tag_t;
/*
* Inline implementation
* =====================
*/
/* Generate the initial byte indicating the type of the following data */
int cbor64_initial_byte(base64_t *streamer, cbor64_mt_t type, uint8_t data);
/* Send a break byte to terminate indefinite-length item */
int cbor64_send_break(base64_t *streamer);
/* This sends a numeric item to the streamer using big-endian encoding */
int cbor64_send_item(base64_t *streamer, cbor64_mt_t type, uint64_t number);
/* Send a type array of bytes (UTF8 or bytes) */
int cbor64_send_typed_bytes(base64_t *streamer, cbor64_mt_t type, unsigned char *buffer, size_t length);
/* Send a simple value in one or two bytes */
int cbor64_send_simple(base64_t *streamer, cbor64_simple_t value);
/*
* External API
* ============
*/
/*
* Send a tag for the following item
*
* A tag is a single item describing the next item in the stream. It
* can denote some particular semantic meaning for the subsequent item
* or that the item is to be encoded in some particular manner when
* translated to JSON (see cbor64_tag_t).
*/
static inline int cbor64_tag(base64_t *streamer, cbor64_tag_t tag)
{
return cbor64_send_item(streamer, CBOR64_MT_TAG, tag);
}
/*
* Simple types
* ------------
*/
/* Send a boolean value */
static inline int cbor64_bool(base64_t *streamer, int boolean)
{
uint8_t value = CBOR64_SIMPLE_FALSE;
if (boolean) {
value = CBOR64_SIMPLE_TRUE;
}
return cbor64_send_simple(streamer, value);
}
/* Send a null */
static inline int cbor64_null(base64_t *streamer)
{
return cbor64_send_simple(streamer, CBOR64_SIMPLE_NULL);
}
/* Send an undefined */
static inline int cbor64_undefined(base64_t *streamer)
{
return cbor64_send_simple(streamer, CBOR64_SIMPLE_UNDEFINED);
}
/*
* Integer types
* -------------
*/
/* Send an unsigned integer value */
static inline int cbor64_uint(base64_t *streamer, uint64_t number)
{
return cbor64_send_item(streamer, CBOR64_MT_UNSIGNED_INT, number);
}
/* Send a signed integer value */
static inline int cbor64_int(base64_t *streamer, int64_t number)
{
cbor64_mt_t type = CBOR64_MT_UNSIGNED_INT;
if (number < 0) {
type = CBOR64_MT_NEGATIVE_INT;
number = (-1) - number;
}
return cbor64_send_item(streamer, type, number);
}
/*
* IEEE 754 Float types
* --------------------
*/
/* Send a single-precision float value */
int cbor64_float(base64_t *streamer, float number);
/* Send a double-precision float value */
int cbor64_double(base64_t *streamer, double number);
/*
* Byte arrays
* -----------
*
* The following functions describe 3 kinds of byte array:
* - Raw bytes (bytes)
* - C strings that are not guaranteed to be UTF8 (string)
* - UTF-8 C strings (utf8)
*
* Each has a function that will stream a single array along with its
* size which can be used directly. Additionally, a series of 'chunks'
* can be sent without the need to know the number of chunks. A series
* of chunks must start with a call to 'cbor64_<kind>_chunks_start' and
* finish with a call to 'cbor64_<kind>_chunks_start' with only calls to
* the corresponding 'cbor64_<kind>' in-between.
*
* For example:
*
* cbor64_utf8_chunks_start(streamer);
* cbor64_utf8(streamer, "Hello,");
* cbor64_utf8(streamer, "world!");
* cbor64_utf8_chunks_end(streamer);
*/
/* send an array of bytes */
static inline int cbor64_bytes(base64_t *streamer, unsigned char *buffer, size_t length)
{
return cbor64_send_typed_bytes(streamer, CBOR64_MT_BYTE_STRING, buffer, length);
}
/* Start chunked bytes */
static inline int cbor64_byte_chunks_start(base64_t *streamer)
{
return cbor64_send_item(streamer, CBOR64_MT_BYTE_STRING, CBOR64_AI_INDEFINITE_LENGTH);
}
/* End chunked string */
static inline int cbor64_byte_chunks_end(base64_t *streamer)
{
return cbor64_send_break(streamer);
}
/* Send a non-UTF-8 string */
static inline int cbor64_string(base64_t *streamer, char *text)
{
return cbor64_bytes(streamer, (unsigned char *) text, strlen(text));
}
/* Start chunked string */
static inline int cbor64_string_chunks_start(base64_t *streamer)
{
return cbor64_send_item(streamer, CBOR64_MT_BYTE_STRING, CBOR64_AI_INDEFINITE_LENGTH);
}
/* End chunked string */
static inline int cbor64_string_chunks_end(base64_t *streamer)
{
return cbor64_send_break(streamer);
}
/* Send a UTF-8 string */
static inline int cbor64_utf8(base64_t *streamer, char *text)
{
return cbor64_send_typed_bytes(streamer, CBOR64_MT_UTF8_STRING, (unsigned char *) text, strlen(text));
}
/* Start chunked UTF-8 string */
static inline int cbor64_utf8_chunks_start(base64_t *streamer)
{
return cbor64_send_item(streamer, CBOR64_MT_UTF8_STRING, CBOR64_AI_INDEFINITE_LENGTH);
}
/* End chunked UTF-8 string */
static inline int cbor64_utf8_chunks_end(base64_t *streamer)
{
return cbor64_send_break(streamer);
}
/*
* Arrays
* ------
*
* Arrays are a series of items. An array of known length need only
* start with a call to 'cbor64_array_length'.
*
* cbor64_array_length(streamer, 2);
* cbor64_uint(streamer, 12);
* cbor64_uint(streamer, 28);
*
* If the length is unknown, the array can be started with
* 'cbor64_array_start' and completed with a call to 'cbor64_array_end'.
*
* cbor64_array_start(streamer);
* cbor64_uint(streamer, 15);
* cbor64_uint(streamer, 10538);
* cbor64_array_end(streamer);
*/
/* Start an array of unknown length */
static inline int cbor64_array_start(base64_t *streamer)
{
return cbor64_initial_byte(streamer, CBOR64_MT_ARRAY, CBOR64_AI_INDEFINITE_LENGTH);
}
/* End an array of unknown length */
static inline int cbor64_array_end(base64_t *streamer)
{
return cbor64_send_break(streamer);
}
/* Start an array of known length */
static inline int cbor64_array_length(base64_t *streamer, uint64_t length)
{
return cbor64_send_item(streamer, CBOR64_MT_ARRAY, length);
}
/*
* Maps
* ----
*
* Maps are a series of key-value pairs. The keys may be of any type.
*
* A map of known length need only start with a call to
* 'cbor64_map_length'.
*
* cbor64_map_length(streamer, 2);
* cbor64_utf8(streamer, "x");
* cbor64_uint(streamer, 48);
* cbor64_utf8(streamer, "y");
* cbor64_uint(streamer, 97);
*
* If the length is unknown, the map can be started with
* 'cbor64_map_start' and completed with a call to 'cbor64_map_end'.
*
* cbor64_map_start(streamer);
* cbor64_utf8(streamer, "x");
* cbor64_uint(streamer, 48);
* cbor64_utf8(streamer, "y");
* cbor64_uint(streamer, 97);
* cbor64_map_end(streamer);
*/
/* Start a map of unknown length */
static inline int cbor64_map_start(base64_t *streamer)
{
return cbor64_initial_byte(streamer, CBOR64_MT_MAP, CBOR64_AI_INDEFINITE_LENGTH);
}
/* End a map of unknown length */
static inline int cbor64_map_end(base64_t *streamer)
{
return cbor64_send_break(streamer);
}
/* Start a map of known length */
static inline int cbor64_map_length(base64_t *streamer, uint64_t length)
{
return cbor64_send_item(streamer, CBOR64_MT_MAP, length);
}
/*
* String reference domains
* ========================
*
* String reference domains allow reduced encoding of strings by only
* emitting each encoded string once and then using tagged numeric
* references to previous occurrences of strings.
*
* The current implementation is suboptimal but avoid allocation by
* using a static allocation of the strings used.
*
* Within a string reference domain, all strings must be emitted using
* 'cbor64_string_ref' or 'cbor64_utf8_ref' emitter. To emit a sized
* byte array or data containing strings not in the domain, you can
* create a new null domain that contains no references.
*
* Using shared values
* -------------------
*
* If the tooling used does not support string reference domains but
* does support shared values, this can be used to implement similar
* semantics, however only one domain using shared values can exist in a
* dataset.
*/
/* Tracks the strings which have already been emitted and their index. */
typedef struct {
char **strings;
/* Use shared values rather than string references */
bool shared_values;
uint64_t emitted;
} cbor64_domain_t;
/* Start a new domain with no inner string references */
static inline int cbor64_null_domain(base64_t *streamer)
{
return cbor64_tag(streamer, CBOR64_TAG_STRING_REF_DOMAIN);
}
/*
* Create a new string reference domain
*
* The provided array of strings must not be used again within this
* domain in a nested fashion.
*
* The array of strings must be terminated with a NULL.
*/
static inline int cbor64_string_ref_domain(base64_t *streamer, char **strings, cbor64_domain_t *domain)
{
domain->strings = strings;
domain->emitted = 0;
domain->shared_values = false;
return cbor64_tag(streamer, CBOR64_TAG_STRING_REF_DOMAIN);
}
/*
* Create a new shared value domain
*
* There must be no more than one shared value domain in an output.
*
* The provided array of strings must not be used again within this
* domain in a nested fashion.
*
* The array of strings must be terminated with a NULL.
*/
static inline void cbor64_shared_value_domain(char **strings, cbor64_domain_t *domain)
{
domain->strings = strings;
domain->emitted = 0;
domain->shared_values = true;
}
/*
* Emit a string reference
*/
int cbor64_string_ref(base64_t *streamer, cbor64_domain_t *domain, char *string);
/*
* Emit a utf8 reference
*/
int cbor64_utf8_ref(base64_t *streamer, cbor64_domain_t *domain, char *string);