pw_log_tokenized: Python tooling for message metadata

- Class for reading "■key♦value" data from log format strings.
- Add the line field to the Metadata class.
- Update Metadata bit field widths.

Change-Id: Id06fbccf2f0c496586b29ef5f6ad23c08fcfd806
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/47862
Commit-Queue: Wyatt Hepler <hepler@google.com>
Pigweed-Auto-Submit: Wyatt Hepler <hepler@google.com>
Reviewed-by: Keir Mierle <keir@google.com>
diff --git a/pw_log_tokenized/py/BUILD.gn b/pw_log_tokenized/py/BUILD.gn
index f8ebd18..6c66e61 100644
--- a/pw_log_tokenized/py/BUILD.gn
+++ b/pw_log_tokenized/py/BUILD.gn
@@ -19,5 +19,6 @@
 pw_python_package("py") {
   setup = [ "setup.py" ]
   sources = [ "pw_log_tokenized/__init__.py" ]
+  tests = [ "format_string_test.py" ]
   pylintrc = "$dir_pigweed/.pylintrc"
 }
diff --git a/pw_log_tokenized/py/format_string_test.py b/pw_log_tokenized/py/format_string_test.py
new file mode 100644
index 0000000..b20c6a0
--- /dev/null
+++ b/pw_log_tokenized/py/format_string_test.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+# Copyright 2021 The Pigweed Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+"""Tests decoding metadata from log strings."""
+
+import unittest
+
+from pw_log_tokenized import FormatStringWithMetadata
+
+
+class TestDecodeTokenized(unittest.TestCase):
+    """Tests decoding tokenized strings with various arguments."""
+    def test_all_fields(self):
+        log = FormatStringWithMetadata(
+            '■msg♦hello %d■file♦__FILE__■module♦log module name!')
+        self.assertEqual(log.message, 'hello %d')
+        self.assertEqual(log.module, 'log module name!')
+        self.assertEqual(log.file, '__FILE__')
+
+    def test_different_fields(self):
+        log = FormatStringWithMetadata('■msg♦hello %d■module♦■THING♦abc123')
+        self.assertEqual(log.message, 'hello %d')
+        self.assertEqual(log.module, '')
+        self.assertEqual(log.file, '')
+        self.assertEqual(log.fields['THING'], 'abc123')
+
+    def test_no_metadata(self):
+        log = FormatStringWithMetadata('a■msg♦not formatted correctly')
+        self.assertEqual(log.message, log.raw_string)
+        self.assertEqual(log.module, '')
+        self.assertEqual(log.file, '')
+
+    def test_invalid_field_name(self):
+        log = FormatStringWithMetadata('■msg♦M♦S♦G■1abc♦abc■other♦hi')
+        self.assertEqual(log.message, 'M♦S♦G■1abc♦abc')
+        self.assertEqual(log.fields['other'], 'hi')
+
+    def test_delimiters_in_value(self):
+        log = FormatStringWithMetadata('■msg♦♦■♦■yo■module♦M♦DU■E')
+        self.assertEqual(log.message, '♦■♦■yo')
+        self.assertEqual(log.module, 'M♦DU■E')
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/pw_log_tokenized/py/pw_log_tokenized/__init__.py b/pw_log_tokenized/py/pw_log_tokenized/__init__.py
index 5c46b7b..3e1768b 100644
--- a/pw_log_tokenized/py/pw_log_tokenized/__init__.py
+++ b/pw_log_tokenized/py/pw_log_tokenized/__init__.py
@@ -14,6 +14,8 @@
 """Tools for working with tokenized logs."""
 
 from dataclasses import dataclass
+import re
+from typing import Dict, Mapping
 
 
 def _mask(value: int, start: int, count: int) -> int:
@@ -23,12 +25,13 @@
 
 @dataclass(frozen=True)
 class Metadata:
-    """Parses the metadata payload sent by pw_log_tokenized."""
+    """Parses the metadata payload used by pw_log_tokenized."""
     _value: int
 
-    log_bits: int = 6
+    log_bits: int = 3
     module_bits: int = 16
-    flag_bits: int = 10
+    flag_bits: int = 2
+    line_bits: int = 11
 
     def log_level(self) -> int:
         return _mask(self._value, 0, self.log_bits)
@@ -39,3 +42,39 @@
     def flags(self) -> int:
         return _mask(self._value, self.log_bits + self.module_bits,
                      self.flag_bits)
+
+    def line(self) -> int:
+        return _mask(self._value,
+                     self.log_bits + self.module_bits + self.flag_bits,
+                     self.line_bits)
+
+
+class FormatStringWithMetadata:
+    """Parses metadata from a log format string with metadata fields."""
+    _FIELD_KEY = re.compile(r'■([a-zA-Z]\w*)♦', flags=re.ASCII)
+
+    def __init__(self, string: str) -> None:
+        self.raw_string = string
+        self.fields: Dict[str, str] = {}
+
+        # Only look for fields if the raw string starts with one.
+        if self._FIELD_KEY.match(self.raw_string):
+            fields = self._FIELD_KEY.split(self.raw_string)[1:]
+            for name, value in zip(fields[::2], fields[1::2]):
+                self.fields[name] = value
+
+    @property
+    def message(self) -> str:
+        """Displays the msg field or the whole string if it is not present."""
+        return self.fields.get('msg', self.raw_string)
+
+    @property
+    def module(self) -> str:
+        return self.fields.get('module', '')
+
+    @property
+    def file(self) -> str:
+        return self.fields.get('file', '')
+
+    def __repr__(self) -> str:
+        return self.message