hw/ip/rom_ctrl/util/mem.py - 3p/lowrisc/opentitan - Git at Google

 #!/usr/bin/env python3
 # Copyright lowRISC contributors.
 # Licensed under the Apache License, Version 2.0, see LICENSE for details.
 # SPDX-License-Identifier: Apache-2.0

 import random
 import re
 import subprocess
 import tempfile
 from typing import BinaryIO, IO, List, Optional, TextIO, Tuple

 from elftools.elf.elffile import ELFFile  # type: ignore


 def red_xor32(word: int) -> int:
     '''Reduction XOR for a uint32'''
     word = (word & 0xffff) ^ (word >> 16)
     word = (word & 0xff) ^ (word >> 8)
     word = (word & 0xf) ^ (word >> 4)
     word = (word & 0x3) ^ (word >> 2)
     return (word & 0x1) ^ (word >> 1)


 def add_ecc32(word: int) -> int:
     '''Add Hsiao (39,32) ECC bits to a 32-bit unsigned word'''
     assert 0 <= word < (1 << 32)
     b0 = red_xor32(word ^ 0x00850e56a2) << 32
     b1 = red_xor32(word ^ 0x002e534c61) << 33
     b2 = red_xor32(word ^ 0x000901a9fe) << 34
     b3 = red_xor32(word ^ 0x007079a702) << 35
     b4 = red_xor32(word ^ 0x00caba900d) << 36
     b5 = red_xor32(word ^ 0x00d3c44b18) << 37
     b6 = red_xor32(word ^ 0x0034a430d5) << 38
     return word | b0 | b1 | b2 | b3 | b4 | b5 | b6


 class MemChunk:
     def __init__(self, base_addr: int, words: List[int]):
         '''A contiguous list of words starting at base_addr'''
         self.base_addr = base_addr
         self.words = words

     def __str__(self) -> str:
         return ('MemChunk(@{:#x}, words_len={})'
                 .format(self.base_addr, len(self.words)))

     def next_addr(self) -> int:
         '''Get the address directly above the chunk'''
         return self.base_addr + len(self.words)

     def write_vmem(self, width: int, outfile: TextIO) -> None:
         '''Write this chunk as one or more lines to outfile

         width is the maximum width of a word in bits.

         '''
         addr_chars = max(8, (self.next_addr().bit_length() + 3) // 4)
         word_chars = (width + 3) // 4

         # Try to wrap at 79 characters. To do this, pick a number of words so
         # that addr_chars + num_words * (word_chars + 1) fits (note that we
         # gain a character by adding a @ on the front of the address, but lose
         # it again by omitting the trailing space after the last word).
         nwords_on_line = max(1, (79 - addr_chars) // (1 + word_chars))
         for start_idx in range(0, len(self.words), nwords_on_line):
             line_addr = self.base_addr + start_idx
             toks = [f'@{line_addr:0{addr_chars}X}']
             for word in self.words[start_idx:start_idx + nwords_on_line]:
                 toks.append(f'{word:0{word_chars}X}')
             outfile.write(' '.join(toks) + '\n')

     def add_ecc32(self) -> None:
         '''Add ECC32 integrity bits

         This extends the input words (which are assumed to be 32-bit) by 7
         bits, to make 39-bit words.

         '''
         self.words = [add_ecc32(w) for w in self.words]


 class MemFile:
     def __init__(self, width: int, chunks: List[MemChunk]):
         self.width = width
         self.chunks = chunks

     def __str__(self) -> str:
         return ('MemFile(width={}, chunks_len={})'
                 .format(self.width, len(self.chunks)))

     @staticmethod
     def _parse_line(width: int, line: str) -> Tuple[int, List[int]]:
         '''Parse a line from a preprocessed vmem file

         Returns a pair (addr, words) where addr is the address at the start of
         the line and words is a list of the words that have been found, parsed
         to unsigned numbers. Assumes that line has at least one non-whitespace
         character. Each word is checked to make sure that it fits in width
         bits.

         '''
         tokens = line.split()
         assert tokens

         addr_match = re.match(r'@([0-9a-fA-F]+)$', tokens[0])
         if addr_match is None:
             raise ValueError('Bad line format: first token is {!r}, '
                              'which is not in the right format for an address.'
                              .format(tokens[0]))
         addr = int(addr_match.group(1), 16)

         words = []
         for idx, word_tok in enumerate(tokens[1:]):
             try:
                 word = int(word_tok, 16)
             except ValueError:
                 raise ValueError('Word {} of the line is invalid: '
                                  '{!r} is not a hex number.'
                                  .format(idx + 1, word_tok)) from None

             if word < 0 or word >> width:
                 raise ValueError('Word {} of the line is {!r}, which '
                                  'does not fit in an unsigned {}-bit number.'
                                  .format(idx + 1, word_tok, width))
             words.append(word)

         return (addr, words)

     @staticmethod
     def _load_preproc(width: int, infile: IO[str]) -> 'MemFile':
         '''Load a pre-processed file'''
         chunks = []
         next_chunk = None  # type: Optional[MemChunk]
         for line in infile:
             # If the line is empty or whitespace, skip it.
             if not line or line.isspace():
                 continue

             line_addr, line_words = MemFile._parse_line(width, line)

             # If there aren't actually any words on the line, skip it.
             if not line_words:
                 continue

             if next_chunk is None:
                 next_chunk = MemChunk(line_addr, line_words)
                 continue

             # Glue the line onto the current chunk if there's no gap
             chunk_end = next_chunk.next_addr()
             if line_addr < chunk_end:
                 raise ValueError("Cannot read data starting at {:#x}: "
                                  "we're already at {:#x}, so this would "
                                  "go backwards."
                                  .format(line_addr, chunk_end))
             if line_addr == chunk_end:
                 next_chunk.words += line_words
                 continue

             # If we're here, there's a gap between the current chunk and
             # line_addr.
             chunks.append(next_chunk)
             next_chunk = MemChunk(line_addr, line_words)

         if next_chunk is not None:
             chunks.append(next_chunk)

         return MemFile(width, chunks)

     @staticmethod
     def load_vmem(width: int, infile: TextIO) -> 'MemFile':
         '''Read a VMEM file

         This assumes that all words fit in the given width.

         '''
         with tempfile.TemporaryFile('w+') as tmp:
             # First, run cpp as a subprocess to strip out any comments. These
             # are allowed by the vmem format as described in srec_vmem(5) and
             # tokenising them is hard: get the C preprocessor to do the work
             # for us! The -P argument tells cpp not to generate linemarkers
             subprocess.run(['cpp', '-P'], stdin=infile, stdout=tmp, check=True)
             tmp.seek(0)
             return MemFile._load_preproc(width, tmp)

     @staticmethod
     def load_elf32(infile: BinaryIO, base_addr: int) -> 'MemFile':
         '''Read a little-endian 32-bit ELF file'''
         elf_file = ELFFile(infile)
         segments = []  # type: List[Tuple[int, int, bytes]]
         for segment in elf_file.iter_segments():
             seg_type = segment['p_type']

             # We're only interested in nonempty PT_LOAD segments
             if seg_type != 'PT_LOAD' or segment['p_memsz'] == 0:
                 continue

             # seg_lma is the (relative) address of the first byte to be loaded.
             # seg_top is the address of the last byte to be loaded. A one-byte
             # segment will have seg_lma == seg_top.
             seg_lma = segment['p_paddr'] - base_addr
             seg_top = seg_lma + segment['p_memsz'] - 1

             assert seg_lma <= seg_top

             # We re-map the addresses relative to base_addr: check that no
             # segment starts before it.
             if seg_lma < 0:
                 raise ValueError('ELF file contains a segment starting at '
                                  '{:#x}, so cannot be loaded relative to base '
                                  'address {:#x}.'
                                  .format(base_addr + seg_lma, base_addr))

             segments.append((seg_lma, seg_top, segment.data()))

         # Sort the segments by base address
         segments.sort(key=lambda t: t[0])

         # Make sure that they don't overlap
         prev_lma = 0
         next_addr = 0
         for lma, top, data in segments:
             if lma < next_addr:
                 raise ValueError('ELF file contains overlapping segments with '
                                  'address ranges {:#x}..{:#x} and '
                                  '{:#x}..{:#x}.'
                                  .format(base_addr + prev_lma,
                                          base_addr + next_addr - 1,
                                          base_addr + lma,
                                          base_addr + top))
             prev_lma = lma
             next_addr = top + 1

         # Merge any adjacent segments, bridging any sub-word gaps. This doesn't
         # do any other right padding: we'll do that on the final pass that
         # converts to 32-bit words.
         merged_segments = []  # type: List[Tuple[int, int, bytes]]
         next_word = 0
         for lma, top, data in segments:
             # Round the LMA down to the previous word boundary. The non-overlap
             # check above should ensure that this is never actually less than
             # next_word.
             lma_word = lma // 4
             assert next_word <= lma_word

             # If there isn't an aligned whole word between the two segments,
             # bridge the gap
             if merged_segments and next_word == lma_word:
                 last_lma_word, last_top, last_data = merged_segments[-1]
                 if last_top < lma:
                     # The largest gap possible here happens with addresses like
                     # last_top = 0x100; lma = 0x107, which just bridges two
                     # 4-byte words (0x100..0x103 and 0x104..0x107) with one
                     # byte used from each, leaving 6 bytes to fill.
                     assert lma - (last_top + 1) <= 6
                     last_data += bytes(lma - (last_top + 1))
                 merged_segments[-1] = (last_lma_word, top, last_data + data)
             else:
                 # Pad on the left if necessary to ensure that lma is 32-bit
                 # aligned.
                 if lma % 4:
                     merged_segments.append((lma_word, top, bytes(lma % 4) + data))
                 else:
                     merged_segments.append((lma_word, top, data))

             # The index of the first word that starts strictly above top.
             next_word = 1 + (top // 4)

         # Assemble the bytes in each segment into little-endian 32-bit words.
         # Zero-extend any partial word at the end of a segment. Because of the
         # merging in the previous pass, we know this won't cause any overlaps.
         chunks = []  # type: List[MemChunk]
         for lma_word, _, data in merged_segments:
             words = []
             word = 0
             for idx, byte in enumerate(data):
                 shift = 8 * (idx % 4)
                 word |= byte << shift
                 if idx % 4 == 3:
                     words.append(word)
                     word = 0
             # idx here will be the index of the last byte. If data ended with a
             # partial word, idx will be something other than 3 mod 4.
             if idx % 4 != 3:
                 words.append(word)

             chunks.append(MemChunk(lma_word, words))

         return MemFile(32, chunks)

     def next_addr(self) -> int:
         '''Get the address directly above the top of the MemFile'''
         return 0 if not self.chunks else self.chunks[-1].next_addr()

     def write_vmem(self, outfile: TextIO) -> None:
         '''Write data to a VMEM file'''
         for chunk in self.chunks:
             chunk.write_vmem(self.width, outfile)

     def flatten(self, size: int, rnd_seed: int) -> 'MemFile':
         '''Flatten into a single chunk, padding with pseudo-random data

         As well as padding between the chunks, this expands the result up to
         size words by adding padding after the last chunk if necessary.

         '''
         assert self.next_addr() <= size

         old_rnd_state = random.getstate()
         random.seed(rnd_seed)

         try:
             acc = MemChunk(0, [])
             # Add each chunk
             for chunk in self.chunks:
                 acc_end = acc.next_addr()
                 assert acc_end <= chunk.base_addr

                 # If there's a gap before the chunk, insert some random bits
                 padding_len = chunk.base_addr - acc_end
                 if padding_len:
                     acc.words += [random.getrandbits(32)
                                   for _ in range(padding_len)]

                 assert acc.next_addr() == chunk.base_addr
                 acc.words += chunk.words

             acc_end = acc.next_addr()
             assert acc_end == self.next_addr()

             # If there's a gap after the last chunk, insert some more random
             # bits
             padding_len = size - acc_end
             if padding_len:
                 acc.words += [random.getrandbits(32)
                               for _ in range(padding_len)]

             assert acc.next_addr() == size

             return MemFile(self.width, [acc])
         finally:
             random.setstate(old_rnd_state)

     def add_ecc32(self) -> None:
         '''Add ECC32 integrity bits

         This extends the input words (which are assumed to be 32-bit) by 7
         bits, to make 39-bit words.

         '''
         assert self.width <= 32
         for chunk in self.chunks:
             chunk.add_ecc32()
         self.width = 39
	#!/usr/bin/env python3
	# Copyright lowRISC contributors.
	# Licensed under the Apache License, Version 2.0, see LICENSE for details.
	# SPDX-License-Identifier: Apache-2.0

	import random
	import re
	import subprocess
	import tempfile
	from typing import BinaryIO, IO, List, Optional, TextIO, Tuple

	from elftools.elf.elffile import ELFFile # type: ignore


	def red_xor32(word: int) -> int:
	'''Reduction XOR for a uint32'''
	word = (word & 0xffff) ^ (word >> 16)
	word = (word & 0xff) ^ (word >> 8)
	word = (word & 0xf) ^ (word >> 4)
	word = (word & 0x3) ^ (word >> 2)
	return (word & 0x1) ^ (word >> 1)


	def add_ecc32(word: int) -> int:
	'''Add Hsiao (39,32) ECC bits to a 32-bit unsigned word'''
	assert 0 <= word < (1 << 32)
	b0 = red_xor32(word ^ 0x00850e56a2) << 32
	b1 = red_xor32(word ^ 0x002e534c61) << 33
	b2 = red_xor32(word ^ 0x000901a9fe) << 34
	b3 = red_xor32(word ^ 0x007079a702) << 35
	b4 = red_xor32(word ^ 0x00caba900d) << 36
	b5 = red_xor32(word ^ 0x00d3c44b18) << 37
	b6 = red_xor32(word ^ 0x0034a430d5) << 38
	return word \| b0 \| b1 \| b2 \| b3 \| b4 \| b5 \| b6


	class MemChunk:
	def __init__(self, base_addr: int, words: List[int]):
	'''A contiguous list of words starting at base_addr'''
	self.base_addr = base_addr
	self.words = words

	def __str__(self) -> str:
	return ('MemChunk(@{:#x}, words_len={})'
	.format(self.base_addr, len(self.words)))

	def next_addr(self) -> int:
	'''Get the address directly above the chunk'''
	return self.base_addr + len(self.words)

	def write_vmem(self, width: int, outfile: TextIO) -> None:
	'''Write this chunk as one or more lines to outfile

	width is the maximum width of a word in bits.

	'''
	addr_chars = max(8, (self.next_addr().bit_length() + 3) // 4)
	word_chars = (width + 3) // 4

	# Try to wrap at 79 characters. To do this, pick a number of words so
	# that addr_chars + num_words * (word_chars + 1) fits (note that we
	# gain a character by adding a @ on the front of the address, but lose
	# it again by omitting the trailing space after the last word).
	nwords_on_line = max(1, (79 - addr_chars) // (1 + word_chars))
	for start_idx in range(0, len(self.words), nwords_on_line):
	line_addr = self.base_addr + start_idx
	toks = [f'@{line_addr:0{addr_chars}X}']
	for word in self.words[start_idx:start_idx + nwords_on_line]:
	toks.append(f'{word:0{word_chars}X}')
	outfile.write(' '.join(toks) + '\n')

	def add_ecc32(self) -> None:
	'''Add ECC32 integrity bits

	This extends the input words (which are assumed to be 32-bit) by 7
	bits, to make 39-bit words.

	'''
	self.words = [add_ecc32(w) for w in self.words]


	class MemFile:
	def __init__(self, width: int, chunks: List[MemChunk]):
	self.width = width
	self.chunks = chunks

	def __str__(self) -> str:
	return ('MemFile(width={}, chunks_len={})'
	.format(self.width, len(self.chunks)))

	@staticmethod
	def _parse_line(width: int, line: str) -> Tuple[int, List[int]]:
	'''Parse a line from a preprocessed vmem file

	Returns a pair (addr, words) where addr is the address at the start of
	the line and words is a list of the words that have been found, parsed
	to unsigned numbers. Assumes that line has at least one non-whitespace
	character. Each word is checked to make sure that it fits in width
	bits.

	'''
	tokens = line.split()
	assert tokens

	addr_match = re.match(r'@([0-9a-fA-F]+)$', tokens[0])
	if addr_match is None:
	raise ValueError('Bad line format: first token is {!r}, '
	'which is not in the right format for an address.'
	.format(tokens[0]))
	addr = int(addr_match.group(1), 16)

	words = []
	for idx, word_tok in enumerate(tokens[1:]):
	try:
	word = int(word_tok, 16)
	except ValueError:
	raise ValueError('Word {} of the line is invalid: '
	'{!r} is not a hex number.'
	.format(idx + 1, word_tok)) from None

	if word < 0 or word >> width:
	raise ValueError('Word {} of the line is {!r}, which '
	'does not fit in an unsigned {}-bit number.'
	.format(idx + 1, word_tok, width))
	words.append(word)

	return (addr, words)

	@staticmethod
	def _load_preproc(width: int, infile: IO[str]) -> 'MemFile':
	'''Load a pre-processed file'''
	chunks = []
	next_chunk = None # type: Optional[MemChunk]
	for line in infile:
	# If the line is empty or whitespace, skip it.
	if not line or line.isspace():
	continue

	line_addr, line_words = MemFile._parse_line(width, line)

	# If there aren't actually any words on the line, skip it.
	if not line_words:
	continue

	if next_chunk is None:
	next_chunk = MemChunk(line_addr, line_words)
	continue

	# Glue the line onto the current chunk if there's no gap
	chunk_end = next_chunk.next_addr()
	if line_addr < chunk_end:
	raise ValueError("Cannot read data starting at {:#x}: "
	"we're already at {:#x}, so this would "
	"go backwards."
	.format(line_addr, chunk_end))
	if line_addr == chunk_end:
	next_chunk.words += line_words
	continue

	# If we're here, there's a gap between the current chunk and
	# line_addr.
	chunks.append(next_chunk)
	next_chunk = MemChunk(line_addr, line_words)

	if next_chunk is not None:
	chunks.append(next_chunk)

	return MemFile(width, chunks)

	@staticmethod
	def load_vmem(width: int, infile: TextIO) -> 'MemFile':
	'''Read a VMEM file

	This assumes that all words fit in the given width.

	'''
	with tempfile.TemporaryFile('w+') as tmp:
	# First, run cpp as a subprocess to strip out any comments. These
	# are allowed by the vmem format as described in srec_vmem(5) and
	# tokenising them is hard: get the C preprocessor to do the work
	# for us! The -P argument tells cpp not to generate linemarkers
	subprocess.run(['cpp', '-P'], stdin=infile, stdout=tmp, check=True)
	tmp.seek(0)
	return MemFile._load_preproc(width, tmp)

	@staticmethod
	def load_elf32(infile: BinaryIO, base_addr: int) -> 'MemFile':
	'''Read a little-endian 32-bit ELF file'''
	elf_file = ELFFile(infile)
	segments = [] # type: List[Tuple[int, int, bytes]]
	for segment in elf_file.iter_segments():
	seg_type = segment['p_type']

	# We're only interested in nonempty PT_LOAD segments
	if seg_type != 'PT_LOAD' or segment['p_memsz'] == 0:
	continue

	# seg_lma is the (relative) address of the first byte to be loaded.
	# seg_top is the address of the last byte to be loaded. A one-byte
	# segment will have seg_lma == seg_top.
	seg_lma = segment['p_paddr'] - base_addr
	seg_top = seg_lma + segment['p_memsz'] - 1

	assert seg_lma <= seg_top

	# We re-map the addresses relative to base_addr: check that no
	# segment starts before it.
	if seg_lma < 0:
	raise ValueError('ELF file contains a segment starting at '
	'{:#x}, so cannot be loaded relative to base '
	'address {:#x}.'
	.format(base_addr + seg_lma, base_addr))

	segments.append((seg_lma, seg_top, segment.data()))

	# Sort the segments by base address
	segments.sort(key=lambda t: t[0])

	# Make sure that they don't overlap
	prev_lma = 0
	next_addr = 0
	for lma, top, data in segments:
	if lma < next_addr:
	raise ValueError('ELF file contains overlapping segments with '
	'address ranges {:#x}..{:#x} and '
	'{:#x}..{:#x}.'
	.format(base_addr + prev_lma,
	base_addr + next_addr - 1,
	base_addr + lma,
	base_addr + top))
	prev_lma = lma
	next_addr = top + 1

	# Merge any adjacent segments, bridging any sub-word gaps. This doesn't
	# do any other right padding: we'll do that on the final pass that
	# converts to 32-bit words.
	merged_segments = [] # type: List[Tuple[int, int, bytes]]
	next_word = 0
	for lma, top, data in segments:
	# Round the LMA down to the previous word boundary. The non-overlap
	# check above should ensure that this is never actually less than
	# next_word.
	lma_word = lma // 4
	assert next_word <= lma_word

	# If there isn't an aligned whole word between the two segments,
	# bridge the gap
	if merged_segments and next_word == lma_word:
	last_lma_word, last_top, last_data = merged_segments[-1]
	if last_top < lma:
	# The largest gap possible here happens with addresses like
	# last_top = 0x100; lma = 0x107, which just bridges two
	# 4-byte words (0x100..0x103 and 0x104..0x107) with one
	# byte used from each, leaving 6 bytes to fill.
	assert lma - (last_top + 1) <= 6
	last_data += bytes(lma - (last_top + 1))
	merged_segments[-1] = (last_lma_word, top, last_data + data)
	else:
	# Pad on the left if necessary to ensure that lma is 32-bit
	# aligned.
	if lma % 4:
	merged_segments.append((lma_word, top, bytes(lma % 4) + data))
	else:
	merged_segments.append((lma_word, top, data))

	# The index of the first word that starts strictly above top.
	next_word = 1 + (top // 4)

	# Assemble the bytes in each segment into little-endian 32-bit words.
	# Zero-extend any partial word at the end of a segment. Because of the
	# merging in the previous pass, we know this won't cause any overlaps.
	chunks = [] # type: List[MemChunk]
	for lma_word, _, data in merged_segments:
	words = []
	word = 0
	for idx, byte in enumerate(data):
	shift = 8 * (idx % 4)
	word \|= byte << shift
	if idx % 4 == 3:
	words.append(word)
	word = 0
	# idx here will be the index of the last byte. If data ended with a
	# partial word, idx will be something other than 3 mod 4.
	if idx % 4 != 3:
	words.append(word)

	chunks.append(MemChunk(lma_word, words))

	return MemFile(32, chunks)

	def next_addr(self) -> int:
	'''Get the address directly above the top of the MemFile'''
	return 0 if not self.chunks else self.chunks[-1].next_addr()

	def write_vmem(self, outfile: TextIO) -> None:
	'''Write data to a VMEM file'''
	for chunk in self.chunks:
	chunk.write_vmem(self.width, outfile)

	def flatten(self, size: int, rnd_seed: int) -> 'MemFile':
	'''Flatten into a single chunk, padding with pseudo-random data

	As well as padding between the chunks, this expands the result up to
	size words by adding padding after the last chunk if necessary.

	'''
	assert self.next_addr() <= size

	old_rnd_state = random.getstate()
	random.seed(rnd_seed)

	try:
	acc = MemChunk(0, [])
	# Add each chunk
	for chunk in self.chunks:
	acc_end = acc.next_addr()
	assert acc_end <= chunk.base_addr

	# If there's a gap before the chunk, insert some random bits
	padding_len = chunk.base_addr - acc_end
	if padding_len:
	acc.words += [random.getrandbits(32)
	for _ in range(padding_len)]

	assert acc.next_addr() == chunk.base_addr
	acc.words += chunk.words

	acc_end = acc.next_addr()
	assert acc_end == self.next_addr()

	# If there's a gap after the last chunk, insert some more random
	# bits
	padding_len = size - acc_end
	if padding_len:
	acc.words += [random.getrandbits(32)
	for _ in range(padding_len)]

	assert acc.next_addr() == size

	return MemFile(self.width, [acc])
	finally:
	random.setstate(old_rnd_state)

	def add_ecc32(self) -> None:
	'''Add ECC32 integrity bits

	This extends the input words (which are assumed to be 32-bit) by 7
	bits, to make 39-bit words.

	'''
	assert self.width <= 32
	for chunk in self.chunks:
	chunk.add_ecc32()
	self.width = 39