pw_tokenizer: C++ support for the Base64 format

Provide functions for encoding and decoding tokenized messages in the
standard prefixed Base64 format.

Change-Id: I8d09fddd29f5e3589228e5e1947cde11149c8233
diff --git a/pw_tokenizer/BUILD b/pw_tokenizer/BUILD
index 7fe7aa1..49c2b5c 100644
--- a/pw_tokenizer/BUILD
+++ b/pw_tokenizer/BUILD
@@ -49,6 +49,22 @@
 )
 
 pw_cc_library(
+    name = "base64",
+    srcs = [
+        "base64.cc",
+    ],
+    hdrs = [
+        "public/pw_tokenizer/base64.h",
+    ],
+    includes = ["public"],
+    deps = [
+        "//pw_base64",
+        "//pw_preprocessor",
+        "//pw_span",
+    ],
+)
+
+pw_cc_library(
     name = "decoder",
     srcs = [
         "decode.cc",
@@ -109,6 +125,16 @@
 )
 
 pw_cc_test(
+    name = "base64_test",
+    srcs = [
+        "base64_test.cc",
+    ],
+    deps = [
+        ":base64",
+    ],
+)
+
+pw_cc_test(
     name = "decode_test",
     srcs = [
         "decode_test.cc",
diff --git a/pw_tokenizer/BUILD.gn b/pw_tokenizer/BUILD.gn
index 4fc79ce..897692f 100644
--- a/pw_tokenizer/BUILD.gn
+++ b/pw_tokenizer/BUILD.gn
@@ -53,6 +53,24 @@
   ]
 }
 
+source_set("base64") {
+  public_configs = [
+    "$dir_pw_build:pw_default_cpp",
+    ":default_config",
+  ]
+  public = [
+    "public/pw_tokenizer/base64.h",
+  ]
+  sources = [ "base64.cc" ] + public
+  public_deps = [
+    "$dir_pw_preprocessor",
+    "$dir_pw_span",
+  ]
+  deps = [
+    "$dir_pw_base64",
+  ]
+}
+
 source_set("decoder") {
   public_configs = [
     "$dir_pw_build:pw_default_cpp",
@@ -110,6 +128,7 @@
 pw_test_group("tests") {
   tests = [
     ":argument_types_test",
+    ":base64_test",
     ":decode_test",
     ":detokenize_test",
     ":hash_test",
@@ -134,6 +153,15 @@
   ]
 }
 
+pw_test("base64_test") {
+  sources = [
+    "base64_test.cc",
+  ]
+  deps = [
+    ":base64",
+  ]
+}
+
 pw_test("decode_test") {
   sources = [
     "decode_test.cc",
diff --git a/pw_tokenizer/CMakeLists.txt b/pw_tokenizer/CMakeLists.txt
index f9c1280..e95cb2d 100644
--- a/pw_tokenizer/CMakeLists.txt
+++ b/pw_tokenizer/CMakeLists.txt
@@ -22,6 +22,16 @@
     pw_varint
 )
 
+pw_add_module_library(pw_tokenizer.base64
+  SOURCES
+    base64.cc
+  PUBLIC_DEPS
+    pw_preprocessor
+    pw_span
+  PRIVATE_DEPS
+    pw_base64
+)
+
 pw_add_module_library(pw_tokenizer.decoder
   SOURCES
     decode.cc
@@ -63,6 +73,16 @@
     pw_tokenizer
 )
 
+pw_add_test(pw_tokenizer.base64_test
+  SOURCES
+    base64_test.cc
+  DEPS
+    pw_tokenizer.base64
+  GROUPS
+    modules
+    pw_tokenizer
+)
+
 pw_add_test(pw_tokenizer.decode_test
   SOURCES
     decode_test.cc
diff --git a/pw_tokenizer/base64.cc b/pw_tokenizer/base64.cc
new file mode 100644
index 0000000..e05390e
--- /dev/null
+++ b/pw_tokenizer/base64.cc
@@ -0,0 +1,58 @@
+// Copyright 2020 The Pigweed Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+#include "pw_tokenizer/base64.h"
+
+#include "pw_base64/base64.h"
+#include "pw_span/span.h"
+
+namespace pw::tokenizer {
+
+extern "C" size_t pw_TokenizerPrefixedBase64Encode(
+    const void* binary_message,
+    size_t binary_size_bytes,
+    void* output_buffer,
+    size_t output_buffer_size_bytes) {
+  const size_t encoded_size = base64::EncodedSize(binary_size_bytes) + 1;
+
+  if (output_buffer_size_bytes < encoded_size) {
+    return 0;
+  }
+
+  char* output = static_cast<char*>(output_buffer);
+  output[0] = kBase64Prefix;
+
+  base64::Encode(
+      span(static_cast<const std::byte*>(binary_message), binary_size_bytes),
+      &output[1]);
+
+  return encoded_size;
+}
+
+extern "C" size_t pw_TokenizerPrefixedBase64Decode(const void* base64_message,
+                                                   size_t base64_size_bytes,
+                                                   void* output_buffer,
+                                                   size_t output_buffer_size) {
+  const char* base64 = static_cast<const char*>(base64_message);
+
+  if (base64_size_bytes == 0 || base64[0] != kBase64Prefix) {
+    return 0;
+  }
+
+  return base64::Decode(
+      std::string_view(&base64[1], base64_size_bytes - 1),
+      span(static_cast<std::byte*>(output_buffer), output_buffer_size));
+}
+
+}  // namespace pw::tokenizer
diff --git a/pw_tokenizer/base64_test.cc b/pw_tokenizer/base64_test.cc
new file mode 100644
index 0000000..7c2dd7d
--- /dev/null
+++ b/pw_tokenizer/base64_test.cc
@@ -0,0 +1,118 @@
+// Copyright 2020 The Pigweed Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+#include "pw_tokenizer/base64.h"
+
+#include <cstring>
+#include <string_view>
+
+#include "gtest/gtest.h"
+#include "pw_span/span.h"
+
+namespace pw::tokenizer {
+namespace {
+
+class PrefixedBase64 : public ::testing::Test {
+ protected:
+  PrefixedBase64() : binary_{}, base64_{} {}
+
+  std::byte binary_[32];
+  char base64_[32];
+};
+
+const struct TestData {
+  template <size_t kSize>
+  TestData(const char (&binary)[kSize], const char* base64)
+      : binary{as_bytes(span(binary, kSize - 1))}, base64(base64) {}
+
+  span<const std::byte> binary;
+  std::string_view base64;
+} kTestData[] = {
+    {"", "$"},
+    {"\x00", "$AA=="},
+    {"\x71", "$cQ=="},
+    {"\xff", "$/w=="},
+    {"\x63\xa9", "$Y6k="},
+    {"\x69\x89\x03", "$aYkD"},
+    {"\x80\xf5\xc8\xd4", "$gPXI1A=="},
+    {"\x6e\xb8\x91\x3f\xac", "$briRP6w="},
+    {"\x1f\x88\x91\xbb\xd7\x10", "$H4iRu9cQ"},
+    {"\xac\xcf\xb2\xd5\xee\xa2\x8e", "$rM+y1e6ijg=="},
+    {"\xff\x15\x25\x7e\x7b\xc9\x7b\x60", "$/xUlfnvJe2A="},
+    {"\xd5\xab\xd9\xa6\xae\xaa\x33\x9f\x66", "$1avZpq6qM59m"},
+    {"\x6b\xfd\x95\xc5\x4a\xc7\xc2\x39\x45\xdc", "$a/2VxUrHwjlF3A=="},
+    {"\x4c\xde\xee\xb8\x68\x0d\x9c\x66\x3e\xea\x46", "$TN7uuGgNnGY+6kY="},
+};
+
+TEST_F(PrefixedBase64, Encode) {
+  for (auto& data : kTestData) {
+    EXPECT_EQ(data.base64.size(), PrefixedBase64Encode(data.binary, base64_));
+    ASSERT_EQ(data.base64, base64_);
+  }
+}
+
+TEST_F(PrefixedBase64, Encode_EmptyInput_WritesPrefix) {
+  EXPECT_EQ(1u, PrefixedBase64Encode({}, base64_));
+  EXPECT_EQ('$', base64_[0]);
+}
+
+TEST_F(PrefixedBase64, Encode_EmptyOutput_WritesNothing) {
+  EXPECT_EQ(0u, PrefixedBase64Encode(kTestData[5].binary, span(base64_, 0)));
+  EXPECT_EQ('\0', base64_[0]);
+}
+
+TEST_F(PrefixedBase64, Decode) {
+  for (auto& data : kTestData) {
+    EXPECT_EQ(data.binary.size(), PrefixedBase64Decode(data.base64, binary_));
+    ASSERT_EQ(0, std::memcmp(data.binary.data(), binary_, data.binary.size()));
+  }
+}
+
+TEST_F(PrefixedBase64, Decode_EmptyInput_WritesNothing) {
+  EXPECT_EQ(0u, PrefixedBase64Decode({}, binary_));
+  EXPECT_EQ(std::byte{0}, binary_[0]);
+}
+
+TEST_F(PrefixedBase64, Decode_OnlyPrefix_WritesNothing) {
+  EXPECT_EQ(0u, PrefixedBase64Decode("$", binary_));
+  EXPECT_EQ(std::byte{0}, binary_[0]);
+}
+
+TEST_F(PrefixedBase64, Decode_EmptyOutput_WritesNothing) {
+  EXPECT_EQ(0u, PrefixedBase64Decode(kTestData[5].base64, span(binary_, 0)));
+  EXPECT_EQ(std::byte{0}, binary_[0]);
+}
+
+TEST_F(PrefixedBase64, Decode_OutputTooSmall_WritesNothing) {
+  auto& item = kTestData[5];
+  EXPECT_EQ(
+      0u,
+      PrefixedBase64Decode(item.base64, span(binary_, item.binary.size() - 1)));
+  EXPECT_EQ(std::byte{0}, binary_[0]);
+}
+
+TEST(PrefixedBase64, DecodeInPlace) {
+  std::byte buffer[32];
+
+  for (auto& data : kTestData) {
+    std::memcpy(buffer, data.base64.data(), data.base64.size());
+
+    EXPECT_EQ(data.binary.size(),
+              PrefixedBase64DecodeInPlace(span(buffer, data.base64.size())));
+    ASSERT_EQ(0, std::memcmp(data.binary.data(), buffer, data.binary.size()));
+  }
+}
+
+}  // namespace
+}  // namespace pw::tokenizer
diff --git a/pw_tokenizer/public/pw_tokenizer/base64.h b/pw_tokenizer/public/pw_tokenizer/base64.h
new file mode 100644
index 0000000..45bc029
--- /dev/null
+++ b/pw_tokenizer/public/pw_tokenizer/base64.h
@@ -0,0 +1,105 @@
+// Copyright 2020 The Pigweed Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+// This file provides functions for working with the prefixed Base64 format for
+// tokenized messages. This format is useful for transmitting tokenized messages
+// as plain text.
+//
+// The format uses a prefix character ($), followed by the Base64 version of the
+// tokenized message. For example, consider a tokenized message with token
+// 0xfeb35a42 and encoded argument 0x13. This messsage would be encoded as
+// follows:
+//
+//            Binary: 42 5a b3 fe 13  [5 bytes]
+//
+//   Prefixed Base64: $Qlqz/hM=       [9 bytes]
+//
+#pragma once
+
+#include <stddef.h>
+
+#include "pw_preprocessor/util.h"
+
+// This character is used to mark the start of a Base64-encoded tokenized
+// message. For consistency, it is recommended to always use $ if possible.
+//
+// If desired, any non-Base64 character may be used as a suffix.
+#define PW_TOKENIZER_BASE64_PREFIX '$'
+
+PW_EXTERN_C_START
+
+// Encodes a binary tokenized message as prefixed Base64. Returns the size of
+// the number of characters written to output_buffer. Returns 0 if the buffer is
+// too small.
+//
+// Equivalent to pw::tokenizer::PrefixedBase64Encode.
+size_t pw_TokenizerPrefixedBase64Encode(const void* binary_message,
+                                        size_t binary_size_bytes,
+                                        void* output_buffer,
+                                        size_t output_buffer_size_bytes);
+// Decodes a prefixed Base64 tokenized message to binary. Returns the size of
+// the decoded binary data. The resulting data is ready to be passed to
+// pw::tokenizer::Detokenizer::Detokenize. Returns 0 if the buffer is too small,
+// the expected prefix character is missing, or the Base64 data is corrupt.
+//
+// Equivalent to pw::tokenizer::PrefixedBase64Encode.
+size_t pw_TokenizerPrefixedBase64Decode(const void* base64_message,
+                                        size_t base64_size_bytes,
+                                        void* output_buffer,
+                                        size_t output_buffer_size);
+
+PW_EXTERN_C_END
+
+#ifdef __cplusplus
+
+#include <string_view>
+
+#include "pw_span/span.h"
+
+namespace pw::tokenizer {
+
+inline constexpr char kBase64Prefix = PW_TOKENIZER_BASE64_PREFIX;
+
+// Encodes a binary tokenized message as prefixed Base64. Returns the size of
+// the number of characters written to output_buffer. Returns 0 if the buffer is
+// too small.
+inline size_t PrefixedBase64Encode(span<const std::byte> binary_message,
+                                   span<char> output_buffer) {
+  return pw_TokenizerPrefixedBase64Encode(binary_message.data(),
+                                          binary_message.size(),
+                                          output_buffer.data(),
+                                          output_buffer.size());
+}
+
+// Decodes a prefixed Base64 tokenized message to binary. Returns the size of
+// the decoded binary data. The resulting data is ready to be passed to
+// pw::tokenizer::Detokenizer::Detokenize.
+inline size_t PrefixedBase64Decode(std::string_view base64_message,
+                                   span<std::byte> output_buffer) {
+  return pw_TokenizerPrefixedBase64Decode(base64_message.data(),
+                                          base64_message.size(),
+                                          output_buffer.data(),
+                                          output_buffer.size());
+}
+
+// Decodes a prefixed Base64 tokenized message to binary in place. Returns the
+// size of the decoded binary data.
+inline size_t PrefixedBase64DecodeInPlace(span<std::byte> buffer) {
+  return pw_TokenizerPrefixedBase64Decode(
+      buffer.data(), buffer.size(), buffer.data(), buffer.size());
+}
+
+}  // namespace pw::tokenizer
+
+#endif  // __cplusplus