pw_tokenizer: Replace string literals with tokens
pw_tokenizer provides macros that replace printf-style string literals
with 32-bit hashes at compile time. The string literals are removed
from the resulting binary, which dramatically reduces the binary size.
Like any printf-style string, binary versions of the strings can be
formatted with arguments and then transmitted or stored.
The pw_tokenizer module is general purpose, but its most common use case
is binary logging. In binary logging, human-readable text logs are
replaced with binary tokens. These are decoded off-device.
This commit includes the C and C++ code for tokenizing strings. It also
includes a C++ library for decoding tokenized strings.
Change-Id: I6d5737ab2d6dfdd76dcf70c852b547fdcd68d683
diff --git a/pw_tokenizer/BUILD.gn b/pw_tokenizer/BUILD.gn
new file mode 100644
index 0000000..5567a7a
--- /dev/null
+++ b/pw_tokenizer/BUILD.gn
@@ -0,0 +1,213 @@
+# Copyright 2020 The Pigweed Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+import("$dir_pw_docgen/docs.gni")
+import("$dir_pw_unit_test/test.gni")
+
+config("default_config") {
+ include_dirs = [ "public" ]
+}
+
+source_set("pw_tokenizer") {
+ public_configs = [
+ "$dir_pw_build:pw_default_cpp",
+ ":default_config",
+ ]
+ public_deps = [
+ "$dir_pw_preprocessor",
+ "$dir_pw_span",
+ ]
+ deps = [
+ "$dir_pw_varint",
+ ]
+ public = [
+ "public/pw_tokenizer/pw_tokenizer_65599_fixed_length_hash.h",
+ "public/pw_tokenizer/tokenize.h",
+ ]
+ sources = [
+ "public/pw_tokenizer/config.h",
+ "public/pw_tokenizer/internal/argument_types.h",
+ "public/pw_tokenizer/internal/argument_types_macro_4_byte.h",
+ "public/pw_tokenizer/internal/argument_types_macro_8_byte.h",
+ "public/pw_tokenizer/internal/pw_tokenizer_65599_fixed_length_128_hash_macro.h",
+ "public/pw_tokenizer/internal/pw_tokenizer_65599_fixed_length_80_hash_macro.h",
+ "public/pw_tokenizer/internal/pw_tokenizer_65599_fixed_length_96_hash_macro.h",
+ "public/pw_tokenizer/internal/tokenize_string.h",
+ "tokenize.cc",
+ ]
+ sources += public
+ friend = [
+ ":argument_types_test",
+ ":hash_test",
+ ]
+}
+
+source_set("decoder") {
+ public_configs = [
+ "$dir_pw_build:pw_default_cpp",
+ ":default_config",
+ ]
+ public_deps = [
+ "$dir_pw_span",
+ ]
+ deps = [
+ "$dir_pw_varint",
+ ]
+ public = [
+ "public/pw_tokenizer/detokenize.h",
+ "public/pw_tokenizer/token_database.h",
+ ]
+ sources = [
+ "decode.cc",
+ "detokenize.cc",
+ "public/pw_tokenizer/internal/decode.h",
+ "token_database.cc",
+ ]
+ sources += public
+ friend = [
+ ":decode_test",
+ ":generate_decoding_test_data",
+ ]
+}
+
+# Executable for generating test data for the C++ and Python detokenizers. This
+# target should only be built for the host.
+executable("generate_decoding_test_data") {
+ deps = [
+ ":decoder",
+ ":pw_tokenizer",
+ "$dir_pw_varint",
+ ]
+ sources = [
+ "generate_decoding_test_data.cc",
+ ]
+}
+
+pw_test_group("tests") {
+ tests = [
+ ":argument_types_test",
+ ":decode_test",
+ ":detokenize_test",
+ ":hash_test",
+ ":token_database_test",
+ ":tokenize_test",
+ ]
+ group_deps = [
+ "$dir_pw_preprocessor:tests",
+ "$dir_pw_span:tests",
+ "$dir_pw_status:tests",
+ ]
+}
+
+pw_test("argument_types_test") {
+ sources = [
+ "argument_types_test.c",
+ "argument_types_test.cc",
+ "pw_tokenizer_private/argument_types_test.h",
+ ]
+ deps = [
+ ":pw_tokenizer",
+ ]
+}
+
+pw_test("decode_test") {
+ sources = [
+ "decode_test.cc",
+ "pw_tokenizer_private/tokenized_string_decoding_test_data.h",
+ "pw_tokenizer_private/varint_decoding_test_data.h",
+ ]
+ deps = [
+ ":decoder",
+ "$dir_pw_varint",
+ ]
+}
+
+pw_test("detokenize_test") {
+ sources = [
+ "detokenize_test.cc",
+ ]
+ deps = [
+ ":decoder",
+ ]
+}
+
+pw_test("hash_test") {
+ sources = [
+ "hash_test.cc",
+ "pw_tokenizer_private/generated_hash_test_cases.h",
+ ]
+ deps = [
+ ":pw_tokenizer",
+ ]
+}
+
+pw_test("token_database_test") {
+ sources = [
+ "token_database_test.cc",
+ ]
+ deps = [
+ ":decoder",
+ ]
+}
+
+pw_test("tokenize_test") {
+ sources = [
+ "pw_tokenizer_private/tokenize_test.h",
+ "tokenize_test.c",
+ "tokenize_test.cc",
+ ]
+ deps = [
+ ":pw_tokenizer",
+ "$dir_pw_varint",
+ ]
+}
+
+declare_args() {
+ # pw_java_native_interface_include_dirs specifies the paths to use for
+ # building Java Native Interface libraries. If no paths are provided, targets
+ # that require JNI may not build correctly.
+ #
+ # Example JNI include paths for a Linux system:
+ #
+ # pw_java_native_interface_include_dirs = [
+ # "/usr/local/buildtools/java/jdk/include/",
+ # "/usr/local/buildtools/java/jdk/include/linux",
+ # ]
+ #
+ pw_java_native_interface_include_dirs = []
+}
+
+# Create a shared library for the tokenizer JNI wrapper. The include paths for
+# the JNI headers must be available in the system or provided with the
+# pw_java_native_interface_include_dirs variable.
+shared_library("detokenizer_jni") {
+ public_configs = [
+ "$dir_pw_build:pw_default_cpp",
+ ":default_config",
+ ]
+ include_dirs = pw_java_native_interface_include_dirs
+ sources = [
+ "java/dev/pigweed/tokenizer/detokenizer.cc",
+ ]
+ public_deps = [
+ ":decoder",
+ "$dir_pw_preprocessor",
+ ]
+}
+
+pw_doc_group("docs") {
+ sources = [
+ "docs.rst",
+ ]
+}