[util] Fix extract_sw_logs.py

Fixes #14636.
Fixes issue encountered in #14858.

Rather than being told which ELF sections to parse and generate a database
from, the script now reads ALL valid sections from the ELF and creates a
database of address and string found at that address. This eliminates the
need to pass sections using the `--rodata-sections` arg in the bazel rules.
This takes care of string constants that reside in other sections (for
example, the `chip_info` which resides in its own `.chip_info` section).

The other bug was in the `get_str_at_addr()` method, which returns
a string (or substring) starting at a particular address. This computation
requires the original length of the string to be known. Since the strings
go through sanitization, the orignal length may change, leading to errors
thrown when printing single whitespace characters, such as
`LOG_INFO("\n")`. The change records the original length of the string
along with the string (as a tuple) at each address.

Signed-off-by: Srikrishna Iyer <sriyer@google.com>
diff --git a/rules/opentitan.bzl b/rules/opentitan.bzl
index 9e124f5..6398195 100644
--- a/rules/opentitan.bzl
+++ b/rules/opentitan.bzl
@@ -489,8 +489,6 @@
             arguments = [
                 "--elf-file",
                 src.path,
-                "--rodata-sections",
-                ".rodata",
                 "--logs-fields-section",
                 ".logs.fields",
                 "--name",
diff --git a/sw/device/lib/testing/test_framework/ottf_common.ld b/sw/device/lib/testing/test_framework/ottf_common.ld
index 25cb647..34c6e14 100644
--- a/sw/device/lib/testing/test_framework/ottf_common.ld
+++ b/sw/device/lib/testing/test_framework/ottf_common.ld
@@ -152,10 +152,6 @@
     KEEP(*(__llvm_covfun))
     KEEP(*(__llvm_covmap))
     KEEP(*(__llvm_prf_names))
-
-    /* TODO(#14636): extract_sw_logs_db requires .rodata section. */
-    . = ALIGN(4);
-    _rodata_end = .;
   } > ottf_flash
 
   /**
diff --git a/util/device_sw_utils/extract_sw_logs.py b/util/device_sw_utils/extract_sw_logs.py
index e18c0ee..1c1eb92 100755
--- a/util/device_sw_utils/extract_sw_logs.py
+++ b/util/device_sw_utils/extract_sw_logs.py
@@ -86,7 +86,7 @@
 
     It is assumed that _format has been passed through `cleanup_format()`.
     '''
-    pattern = '''
+    pattern = r'''
          %                    # literal "%"
          (?:[-+0 #]{0,5})     # optional flags
          (?:\d+|\*)?          # width
@@ -102,8 +102,10 @@
     index = 0
     result = []
     for match in m:
-        if match[1] == '%': continue
-        if match[0] == 's': result.append(str(index))
+        if match[1] == '%':
+            continue
+        if match[0] == 's':
+            result.append(str(index))
         index += 1
     return ' '.join(result).strip()
 
@@ -120,15 +122,17 @@
 
     This function processes the read-only sections of the elf supplied as
     a list of ro_content tuples comprising of base addr, size and data in bytes
-    and converts it into an {addr: string} dict which is returned.'''
+    and converts it into an {addr: (string, length} dict which is returned.
+    We preserve the original length of the string becuase the string may
+    go through cleanup methods which will alter it.'''
     result = {}
     for ro_content in ro_contents:
         str_start = 0
         base_addr, size, data = ro_content
         while (str_start < size):
             str_end = data.find(b'\0', str_start)
-            # Skip the remainder of this section since it can't contain any C-strings if
-            # there are no nul bytes.
+            # Skip the remainder of this section since it can't contain any
+            # C-strings if there are no null bytes.
             if str_end == -1:
                 break
             # Skip if start and end is the same
@@ -137,6 +141,7 @@
                 continue
             # Get full string address by adding base addr to the start.
             addr = base_addr + str_start
+            length = str_end - str_start
             string = cleanup_newlines(data[str_start:str_end].decode(
                 'utf-8', errors='replace'))
             if addr in result:
@@ -144,7 +149,7 @@
                 exc_msg += "addr: {} string: {}\n".format(addr, result[addr])
                 exc_msg += "addr: {} string: {}\n".format(addr, string)
                 raise IndexError(exc_msg)
-            result[addr] = string
+            result[addr] = (string, length)
             str_start = str_end + 1
     return result
 
@@ -155,12 +160,13 @@
     It may be possible that the input addr is an offset within the string.
     If true, then it returns remainder of the string starting at the offset.'''
     for addr in addr_strings.keys():
-        if addr <= str_addr < addr + len(addr_strings[addr]):
-            return addr_strings[addr][str_addr - addr:].strip()
-    raise KeyError("string at addr {} not found".format(str_addr))
+        string, length = addr_strings[addr]
+        if addr <= str_addr < addr + length:
+            return string[str_addr - addr:].strip()
+    raise KeyError(f"string at addr {str_addr:x} not found")
 
 
-def extract_sw_logs(elf_file, logs_fields_section, ro_sections):
+def extract_sw_logs(elf_file, logs_fields_section):
     '''This function extracts contents from the logs fields section, and the
     read only sections, processes them and generates a tuple of (results) -
     log with fields and (rodata) - constant strings with their addresses.
@@ -168,26 +174,33 @@
     # Open the elf file.
     with open(elf_file, 'rb') as f:
         elf = elffile.ELFFile(f)
-        # Parse the ro sections to get {addr: string} pairs.
         ro_contents = []
-        for ro_section in ro_sections:
-            section = elf.get_section_by_name(name=ro_section)
-            if section:
-                base_addr = int(section.header['sh_addr'])
-                size = int(section.header['sh_size'])
-                data = section.data()
-                ro_contents.append((base_addr, size, data))
-            else:
-                print("Error: {} section not found in {}".format(
-                    ro_section, elf_file))
-                sys.exit(1)
+        for section_idx in range(elf.num_sections()):
+            section = elf.get_section(section_idx)
+            # Only consider sections stored in the image.
+            if section.header['sh_type'] != "SHT_PROGBITS":
+                continue
+
+            # Ignore the logs fields section.
+            if section.name == logs_fields_section:
+                continue
+
+            # Ignore the debug sections.
+            if section.name.startswith(".debug"):
+                continue
+
+            base_addr = int(section.header['sh_addr'])
+            size = int(section.header['sh_size'])
+            data = section.data()
+            ro_contents.append((base_addr, size, data))
+
         addr_strings = get_addr_strings(ro_contents)
 
         # Dump the {addr: string} data.
         rodata = ""
         for addr in addr_strings.keys():
             rodata += "addr: {}\n".format(hex(addr)[2:])
-            string = cleanup_newlines(addr_strings[addr])
+            string, _ = addr_strings[addr]
             rodata += "string: {}\n".format(string)
 
         # Parse the logs fields section to extract the logs.
@@ -230,11 +243,6 @@
                         '-f',
                         default=LOGS_FIELDS_SECTION,
                         help="Elf section where log fields are written.")
-    parser.add_argument('--rodata-sections',
-                        '-r',
-                        nargs="+",
-                        action="append",
-                        help="Elf sections with rodata.")
     parser.add_argument('--name',
                         '-n',
                         required=True,
@@ -245,19 +253,8 @@
                         help="Output directory.")
     args = parser.parse_args()
 
-    if args.rodata_sections is None:
-        ro_sections = [RODATA_SECTION]
-    else:
-        # TODO: We want the `--rodata-sections` arg to have the 'extend' action
-        # which is only available in Python 3.8. To maintain compatibility with
-        # Python 3.6 (which is the minimum required version for OpenTitan), we
-        # flatten the list here instead.
-        ro_sections = list(
-            set([section for lst in args.rodata_sections for section in lst]))
-
     os.makedirs(args.outdir, exist_ok=True)
-    rodata, result = extract_sw_logs(args.elf_file, args.logs_fields_section,
-                                     ro_sections)
+    rodata, result = extract_sw_logs(args.elf_file, args.logs_fields_section)
 
     outfile = os.path.join(args.outdir, args.name + ".rodata.txt")
     with open(outfile, "w", encoding='utf-8') as f: