[otbn] Factor KnownMem out of Model.py in RIG
No functional change, but this was getting a little unwieldy.
Signed-off-by: Rupert Swarbrick <rswarbrick@lowrisc.org>
diff --git a/hw/ip/otbn/util/rig/known_mem.py b/hw/ip/otbn/util/rig/known_mem.py
new file mode 100644
index 0000000..a711983
--- /dev/null
+++ b/hw/ip/otbn/util/rig/known_mem.py
@@ -0,0 +1,342 @@
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+
+import random
+from typing import List, Optional, Tuple
+
+
+def _extended_euclidean_algorithm(a: int, b: int) -> Tuple[int, int, int]:
+ '''The extended Euclidean algorithm.
+
+ Returns a tuple (r, s, t) so that gcd is the GCD of the two inputs and r =
+ a s + b t.
+
+ '''
+ r, r_nxt = a, b
+ s, s_nxt = 1, 0
+ t, t_nxt = 0, 1
+
+ while r_nxt:
+ q = r // r_nxt
+ r, r_nxt = r_nxt, r - q * r_nxt
+ s, s_nxt = s_nxt, s - q * s_nxt
+ t, t_nxt = t_nxt, t - q * t_nxt
+
+ # If both inputs are non-positive, the result comes out negative and we
+ # should flip all the signs.
+ if r < 0:
+ r, s, t = - r, - s, - t
+
+ return (r, s, t)
+
+
+def _intersect_ranges(a: List[Tuple[int, int]],
+ b: List[Tuple[int, int]]) -> List[Tuple[int, int]]:
+ ret = []
+ paired = ([(r, False) for r in a] + [(r, True) for r in b])
+ arng = None # type: Optional[Tuple[int, int]]
+ brng = None # type: Optional[Tuple[int, int]]
+ for (lo, hi), is_b in sorted(paired):
+ if is_b:
+ if arng is not None:
+ a0, a1 = arng
+ if a0 <= hi and lo <= a1:
+ ret.append((max(a0, lo), min(a1, hi)))
+ brng = (lo, hi)
+ else:
+ if brng is not None:
+ b0, b1 = brng
+ if b0 <= hi and lo <= b1:
+ ret.append((max(lo, b0), min(hi, b1)))
+ arng = (lo, hi)
+ return ret
+
+
+class KnownMem:
+ '''A representation of what memory/CSRs have architectural values'''
+ def __init__(self, top_addr: int):
+ assert top_addr > 0
+
+ self.top_addr = top_addr
+ # A list of pairs of addresses. If the pair (lo, hi) is in the list
+ # then each byte in the address range {lo..hi - 1} has a known value.
+ self.known_ranges = [] # type: List[Tuple[int, int]]
+
+ def copy(self) -> 'KnownMem':
+ '''Return a shallow copy of the object'''
+ ret = KnownMem(self.top_addr)
+ ret.known_ranges = self.known_ranges.copy()
+ return ret
+
+ def merge(self, other: 'KnownMem') -> None:
+ '''Merge in values from another KnownMem object'''
+ assert self.top_addr == other.top_addr
+ self.known_ranges = _intersect_ranges(self.known_ranges,
+ other.known_ranges)
+
+ def touch_range(self, base: int, width: int) -> None:
+ '''Mark {base .. base + width - 1} as known'''
+ assert 0 <= width
+ assert 0 <= base <= self.top_addr - width
+ for off in range(width):
+ self.touch_addr(base + off)
+
+ def touch_addr(self, addr: int) -> None:
+ '''Mark word starting at addr as known'''
+ assert 0 <= addr < self.top_addr
+
+ # Find the index of the last range that starts below us, if there is
+ # one, and the index of the first range that starts above us, if there
+ # is one.
+ last_idx_below = None
+ first_idx_above = None
+ for idx, (lo, hi) in enumerate(self.known_ranges):
+ if lo <= addr:
+ last_idx_below = idx
+ continue
+
+ first_idx_above = idx
+ break
+
+ # Are we below all other ranges?
+ if last_idx_below is None:
+ # Are we one address below the next range above? In which case, we
+ # need to shuffle it back one.
+ if first_idx_above is not None:
+ lo, hi = self.known_ranges[first_idx_above]
+ assert addr < lo
+ if addr == lo - 1:
+ self.known_ranges[first_idx_above] = (lo - 1, hi)
+ return
+
+ # Otherwise, we're disjoint. Add a one-element range at the start.
+ self.known_ranges = [(addr, addr + 1)] + self.known_ranges
+ return
+
+ # If not, are we inside a range? In that case, there's nothing to do.
+ left_lo, left_hi = self.known_ranges[last_idx_below]
+ if addr < left_hi:
+ return
+
+ left = self.known_ranges[:last_idx_below]
+
+ # Are we just above it?
+ if addr == left_hi:
+ # If there is no range above, we can just extend the last range by one.
+ if first_idx_above is None:
+ self.known_ranges = left + [(left_lo, left_hi + 1)]
+ return
+
+ # Otherwise, does this new address glue two ranges together?
+ assert first_idx_above == last_idx_below + 1
+ right_lo, right_hi = self.known_ranges[first_idx_above]
+ assert addr < right_lo
+
+ if addr == right_lo - 1:
+ self.known_ranges = (left + [(left_lo, right_hi)] +
+ self.known_ranges[first_idx_above + 1:])
+ return
+
+ # Otherwise, we still extend the range by one (but have to put the
+ # right hand list back too).
+ self.known_ranges = (left + [(left_lo, left_hi + 1)] +
+ self.known_ranges[first_idx_above:])
+ return
+
+ # We are miles above the left range. If there is no range above, we can
+ # just append a new 1-element range.
+ left_inc = self.known_ranges[:first_idx_above]
+ if first_idx_above is None:
+ self.known_ranges.append((addr, addr + 1))
+ return
+
+ # Otherwise, are we just below the next range?
+ assert first_idx_above == last_idx_below + 1
+ right_lo, right_hi = self.known_ranges[first_idx_above]
+ assert addr < right_lo
+
+ if addr == right_lo - 1:
+ self.known_ranges = (left_inc + [(right_lo - 1, right_hi)] +
+ self.known_ranges[first_idx_above + 1:])
+ return
+
+ # If not, we just insert a 1-element range in between
+ self.known_ranges = (left_inc + [(addr, addr + 1)] +
+ self.known_ranges[first_idx_above:])
+ return
+
+ def pick_lsu_target(self,
+ loads_value: bool,
+ base_addr: int,
+ offset_range: Tuple[int, int],
+ offset_align: int,
+ width: int,
+ addr_align: int) -> Optional[int]:
+ '''Try to pick an address with base and offset.
+
+ If loads_value is true, the memory needs a known value for at least
+ width bytes starting at that address. The address should be encodable
+ as base_addr + offset where offset is in offset_range (inclusive) and
+ is a multiple of offset_align. The address must be a multiple of
+ addr_align.
+
+ On failure, returns None. On success, returns the chosen address.
+
+ '''
+ assert offset_range[0] <= offset_range[1]
+ assert 1 <= offset_align
+ assert 1 <= width
+ assert 1 <= addr_align
+
+ # We're trying to pick an offset and an address so that
+ #
+ # base_addr + offset = addr
+ #
+ # Let's ignore offset_range and questions about valid memory addresses
+ # for a second. We have two alignment requirements from offset and
+ # addr, which mean we're really trying to satisfy something that looks
+ # like
+ #
+ # a = b i + c j
+ #
+ # for a = base_addr; b = -offset_align; c = addr_align: find solutions
+ # i, j.
+ #
+ # This is a 2-variable linear Diophantine equation. If gcd(b, c) does
+ # not divide a, there is no solution. Otherwise, the extended Euclidean
+ # algorithm yields x0, y0 such that
+ #
+ # gcd(b, c) = b x0 + c y0.
+ #
+ # Multiplying up by a / gcd(b, c) gives
+ #
+ # a = b i0 + c j0
+ #
+ # where i0 = x0 * a / gcd(b, c) and j0 = y0 * a / gcd(b, c).
+ #
+ # This is the "inhomogeneous part". It's a solution to the equation,
+ # and every other solution, (i, j) is a translate of the form
+ #
+ # i = i0 + k v
+ # j = j0 - k u
+ #
+ # for some k, where u = b / gcd(b, c) and v = c / gcd(b, c).
+ gcd, x0, y0 = _extended_euclidean_algorithm(-offset_align, addr_align)
+ assert gcd == -offset_align * x0 + addr_align * y0
+ assert 0 < gcd
+
+ if base_addr % gcd:
+ return None
+
+ # If gcd divides base_addr, we convert x0 and y0 to an initial solution
+ # (i0, j0) as described above by multiplying up by base_addr / gcd.
+ scale_factor = base_addr // gcd
+ i0 = x0 * scale_factor
+ j0 = y0 * scale_factor
+ minus_u = offset_align // gcd
+ v = addr_align // gcd
+ assert 0 < v
+ assert 0 < minus_u
+
+ # offset_range gives the possible values of offset, which is - b i
+ # in the equations above. Re-arranging the equation for i gives:
+ #
+ # k v = i - i0
+ #
+ # so
+ #
+ # b k v = b i - b i0 = - offset - b i0
+ #
+ # or
+ #
+ # k = (- offset - b i0) / (b v)
+ #
+ # Since b < 0 and v > 0, the denominator is negative and this is an
+ # increasing function of offset, so we can get the allowed range for k
+ # by evaluating it at the endpoints of offset_range.
+ bv = - offset_align * v
+ k_max = (-offset_range[1] + offset_align * i0) // bv
+ k_min = (-offset_range[0] + offset_align * i0 + (bv - 1)) // bv
+
+ # If k_min > k_max, this means b*v gives such big steps that none
+ # landed in the range of allowed offsets
+ if k_max < k_min:
+ return None
+
+ # Now, we need to consider which memory locations we can actually use.
+ # If we're writing memory, we have a single range of allowed addresses
+ # (all of memory!). If reading, we need to use self.known_ranges. In
+ # either case, adjust for the fact that we need a width-byte access and
+ # then rescale everything into "k units".
+ #
+ # To do that rescaling, we know that c j = addr and that j = j0 - k u.
+ # So
+ #
+ # j0 - k u = addr / c
+ # k u = j0 - addr / c
+ # k = (j0 - addr / c) / u
+ # = (addr / c - j0) / (- u)
+ #
+ # Since u is negative, this is an increasing function of addr, so we
+ # can use address endpoints to get (disjoint) ranges for k.
+ k_ranges = []
+ k_weights = []
+ byte_ranges = (self.known_ranges
+ if loads_value else [(0, self.top_addr - 1)])
+
+ for byte_lo, byte_top in byte_ranges:
+ # Since we're doing an access of width bytes, we round byte_top
+ # down to the largest base address where the access lies completely
+ # in the range.
+ base_hi = byte_top - width
+ if base_hi < byte_lo:
+ continue
+
+ # Compute the valid range for addr/c, rounding inwards.
+ word_lo = (byte_lo + addr_align - 1) // addr_align
+ word_hi = base_hi // addr_align
+
+ # If word_hi < word_lo, there are no multiples of addr_align in the
+ # range [byte_lo, base_hi].
+ if word_hi < word_lo:
+ continue
+
+ # Now translate by -j0 and divide through by -u, rounding inwards.
+ k_hi = (word_hi - j0) // minus_u
+ k_lo = (word_lo - j0 + (minus_u - 1)) // minus_u
+
+ # If k_hi < k_lo, that means there are no multiples of u in the
+ # range [word_lo - j0, word_hi - j0].
+ if k_hi < k_lo:
+ continue
+
+ # Finally, take the intersection with [k_min, k_max]. The
+ # intersection is non-empty so long as k_lo <= k_max and k_min <=
+ # k_hi.
+ if k_lo > k_max or k_min > k_hi:
+ continue
+
+ k_lo = max(k_lo, k_min)
+ k_hi = min(k_hi, k_max)
+
+ k_ranges.append((k_lo, k_hi))
+ k_weights.append(k_hi - k_lo + 1)
+
+ if not k_ranges:
+ return None
+
+ # We can finally pick a value of k. Pick the range (weighted by
+ # k_weights) and then pick uniformly from in that range.
+ k_lo, k_hi = random.choices(k_ranges, weights=k_weights)[0]
+ k = random.randrange(k_lo, k_hi + 1)
+
+ # Convert back to a solution to the original problem
+ i = i0 + k * v
+ j = j0 + k * minus_u
+
+ offset = offset_align * i
+ addr = addr_align * j
+
+ assert addr == base_addr + offset
+ return addr
diff --git a/hw/ip/otbn/util/rig/model.py b/hw/ip/otbn/util/rig/model.py
index 9e61542..ffbc634 100644
--- a/hw/ip/otbn/util/rig/model.py
+++ b/hw/ip/otbn/util/rig/model.py
@@ -9,345 +9,10 @@
from shared.operand import (OperandType,
ImmOperandType, OptionOperandType, RegOperandType)
+from .known_mem import KnownMem
from .program import ProgInsn
-def extended_euclidean_algorithm(a: int, b: int) -> Tuple[int, int, int]:
- '''The extended Euclidean algorithm.
-
- Returns a tuple (r, s, t) so that gcd is the GCD of the two inputs and r =
- a s + b t.
-
- '''
- r, r_nxt = a, b
- s, s_nxt = 1, 0
- t, t_nxt = 0, 1
-
- while r_nxt:
- q = r // r_nxt
- r, r_nxt = r_nxt, r - q * r_nxt
- s, s_nxt = s_nxt, s - q * s_nxt
- t, t_nxt = t_nxt, t - q * t_nxt
-
- # If both inputs are non-positive, the result comes out negative and we
- # should flip all the signs.
- if r < 0:
- r, s, t = - r, - s, - t
-
- return (r, s, t)
-
-
-def intersect_ranges(a: List[Tuple[int, int]],
- b: List[Tuple[int, int]]) -> List[Tuple[int, int]]:
- ret = []
- paired = ([(r, False) for r in a] + [(r, True) for r in b])
- arng = None # type: Optional[Tuple[int, int]]
- brng = None # type: Optional[Tuple[int, int]]
- for (lo, hi), is_b in sorted(paired):
- if is_b:
- if arng is not None:
- a0, a1 = arng
- if a0 <= hi and lo <= a1:
- ret.append((max(a0, lo), min(a1, hi)))
- brng = (lo, hi)
- else:
- if brng is not None:
- b0, b1 = brng
- if b0 <= hi and lo <= b1:
- ret.append((max(lo, b0), min(hi, b1)))
- arng = (lo, hi)
- return ret
-
-
-class KnownMem:
- '''A representation of what memory/CSRs have architectural values'''
- def __init__(self, top_addr: int):
- assert top_addr > 0
-
- self.top_addr = top_addr
- # A list of pairs of addresses. If the pair (lo, hi) is in the list
- # then each byte in the address range {lo..hi - 1} has a known value.
- self.known_ranges = [] # type: List[Tuple[int, int]]
-
- def copy(self) -> 'KnownMem':
- '''Return a shallow copy of the object'''
- ret = KnownMem(self.top_addr)
- ret.known_ranges = self.known_ranges.copy()
- return ret
-
- def merge(self, other: 'KnownMem') -> None:
- '''Merge in values from another KnownMem object'''
- assert self.top_addr == other.top_addr
- self.known_ranges = intersect_ranges(self.known_ranges,
- other.known_ranges)
-
- def touch_range(self, base: int, width: int) -> None:
- '''Mark {base .. base + width - 1} as known'''
- assert 0 <= width
- assert 0 <= base <= self.top_addr - width
- for off in range(width):
- self.touch_addr(base + off)
-
- def touch_addr(self, addr: int) -> None:
- '''Mark word starting at addr as known'''
- assert 0 <= addr < self.top_addr
-
- # Find the index of the last range that starts below us, if there is
- # one, and the index of the first range that starts above us, if there
- # is one.
- last_idx_below = None
- first_idx_above = None
- for idx, (lo, hi) in enumerate(self.known_ranges):
- if lo <= addr:
- last_idx_below = idx
- continue
-
- first_idx_above = idx
- break
-
- # Are we below all other ranges?
- if last_idx_below is None:
- # Are we one address below the next range above? In which case, we
- # need to shuffle it back one.
- if first_idx_above is not None:
- lo, hi = self.known_ranges[first_idx_above]
- assert addr < lo
- if addr == lo - 1:
- self.known_ranges[first_idx_above] = (lo - 1, hi)
- return
-
- # Otherwise, we're disjoint. Add a one-element range at the start.
- self.known_ranges = [(addr, addr + 1)] + self.known_ranges
- return
-
- # If not, are we inside a range? In that case, there's nothing to do.
- left_lo, left_hi = self.known_ranges[last_idx_below]
- if addr < left_hi:
- return
-
- left = self.known_ranges[:last_idx_below]
-
- # Are we just above it?
- if addr == left_hi:
- # If there is no range above, we can just extend the last range by one.
- if first_idx_above is None:
- self.known_ranges = left + [(left_lo, left_hi + 1)]
- return
-
- # Otherwise, does this new address glue two ranges together?
- assert first_idx_above == last_idx_below + 1
- right_lo, right_hi = self.known_ranges[first_idx_above]
- assert addr < right_lo
-
- if addr == right_lo - 1:
- self.known_ranges = (left + [(left_lo, right_hi)] +
- self.known_ranges[first_idx_above + 1:])
- return
-
- # Otherwise, we still extend the range by one (but have to put the
- # right hand list back too).
- self.known_ranges = (left + [(left_lo, left_hi + 1)] +
- self.known_ranges[first_idx_above:])
- return
-
- # We are miles above the left range. If there is no range above, we can
- # just append a new 1-element range.
- left_inc = self.known_ranges[:first_idx_above]
- if first_idx_above is None:
- self.known_ranges.append((addr, addr + 1))
- return
-
- # Otherwise, are we just below the next range?
- assert first_idx_above == last_idx_below + 1
- right_lo, right_hi = self.known_ranges[first_idx_above]
- assert addr < right_lo
-
- if addr == right_lo - 1:
- self.known_ranges = (left_inc + [(right_lo - 1, right_hi)] +
- self.known_ranges[first_idx_above + 1:])
- return
-
- # If not, we just insert a 1-element range in between
- self.known_ranges = (left_inc + [(addr, addr + 1)] +
- self.known_ranges[first_idx_above:])
- return
-
- def pick_lsu_target(self,
- loads_value: bool,
- base_addr: int,
- offset_range: Tuple[int, int],
- offset_align: int,
- width: int,
- addr_align: int) -> Optional[int]:
- '''Try to pick an address with base and offset.
-
- If loads_value is true, the memory needs a known value for at least
- width bytes starting at that address. The address should be encodable
- as base_addr + offset where offset is in offset_range (inclusive) and
- is a multiple of offset_align. The address must be a multiple of
- addr_align.
-
- On failure, returns None. On success, returns the chosen address.
-
- '''
- assert offset_range[0] <= offset_range[1]
- assert 1 <= offset_align
- assert 1 <= width
- assert 1 <= addr_align
-
- # We're trying to pick an offset and an address so that
- #
- # base_addr + offset = addr
- #
- # Let's ignore offset_range and questions about valid memory addresses
- # for a second. We have two alignment requirements from offset and
- # addr, which mean we're really trying to satisfy something that looks
- # like
- #
- # a = b i + c j
- #
- # for a = base_addr; b = -offset_align; c = addr_align: find solutions
- # i, j.
- #
- # This is a 2-variable linear Diophantine equation. If gcd(b, c) does
- # not divide a, there is no solution. Otherwise, the extended Euclidean
- # algorithm yields x0, y0 such that
- #
- # gcd(b, c) = b x0 + c y0.
- #
- # Multiplying up by a / gcd(b, c) gives
- #
- # a = b i0 + c j0
- #
- # where i0 = x0 * a / gcd(b, c) and j0 = y0 * a / gcd(b, c).
- #
- # This is the "inhomogeneous part". It's a solution to the equation,
- # and every other solution, (i, j) is a translate of the form
- #
- # i = i0 + k v
- # j = j0 - k u
- #
- # for some k, where u = b / gcd(b, c) and v = c / gcd(b, c).
- gcd, x0, y0 = extended_euclidean_algorithm(-offset_align, addr_align)
- assert gcd == -offset_align * x0 + addr_align * y0
- assert 0 < gcd
-
- if base_addr % gcd:
- return None
-
- # If gcd divides base_addr, we convert x0 and y0 to an initial solution
- # (i0, j0) as described above by multiplying up by base_addr / gcd.
- scale_factor = base_addr // gcd
- i0 = x0 * scale_factor
- j0 = y0 * scale_factor
- minus_u = offset_align // gcd
- v = addr_align // gcd
- assert 0 < v
- assert 0 < minus_u
-
- # offset_range gives the possible values of offset, which is - b i
- # in the equations above. Re-arranging the equation for i gives:
- #
- # k v = i - i0
- #
- # so
- #
- # b k v = b i - b i0 = - offset - b i0
- #
- # or
- #
- # k = (- offset - b i0) / (b v)
- #
- # Since b < 0 and v > 0, the denominator is negative and this is an
- # increasing function of offset, so we can get the allowed range for k
- # by evaluating it at the endpoints of offset_range.
- bv = - offset_align * v
- k_max = (-offset_range[1] + offset_align * i0) // bv
- k_min = (-offset_range[0] + offset_align * i0 + (bv - 1)) // bv
-
- # If k_min > k_max, this means b*v gives such big steps that none
- # landed in the range of allowed offsets
- if k_max < k_min:
- return None
-
- # Now, we need to consider which memory locations we can actually use.
- # If we're writing memory, we have a single range of allowed addresses
- # (all of memory!). If reading, we need to use self.known_ranges. In
- # either case, adjust for the fact that we need a width-byte access and
- # then rescale everything into "k units".
- #
- # To do that rescaling, we know that c j = addr and that j = j0 - k u.
- # So
- #
- # j0 - k u = addr / c
- # k u = j0 - addr / c
- # k = (j0 - addr / c) / u
- # = (addr / c - j0) / (- u)
- #
- # Since u is negative, this is an increasing function of addr, so we
- # can use address endpoints to get (disjoint) ranges for k.
- k_ranges = []
- k_weights = []
- byte_ranges = (self.known_ranges
- if loads_value else [(0, self.top_addr - 1)])
-
- for byte_lo, byte_top in byte_ranges:
- # Since we're doing an access of width bytes, we round byte_top
- # down to the largest base address where the access lies completely
- # in the range.
- base_hi = byte_top - width
- if base_hi < byte_lo:
- continue
-
- # Compute the valid range for addr/c, rounding inwards.
- word_lo = (byte_lo + addr_align - 1) // addr_align
- word_hi = base_hi // addr_align
-
- # If word_hi < word_lo, there are no multiples of addr_align in the
- # range [byte_lo, base_hi].
- if word_hi < word_lo:
- continue
-
- # Now translate by -j0 and divide through by -u, rounding inwards.
- k_hi = (word_hi - j0) // minus_u
- k_lo = (word_lo - j0 + (minus_u - 1)) // minus_u
-
- # If k_hi < k_lo, that means there are no multiples of u in the
- # range [word_lo - j0, word_hi - j0].
- if k_hi < k_lo:
- continue
-
- # Finally, take the intersection with [k_min, k_max]. The
- # intersection is non-empty so long as k_lo <= k_max and k_min <=
- # k_hi.
- if k_lo > k_max or k_min > k_hi:
- continue
-
- k_lo = max(k_lo, k_min)
- k_hi = min(k_hi, k_max)
-
- k_ranges.append((k_lo, k_hi))
- k_weights.append(k_hi - k_lo + 1)
-
- if not k_ranges:
- return None
-
- # We can finally pick a value of k. Pick the range (weighted by
- # k_weights) and then pick uniformly from in that range.
- k_lo, k_hi = random.choices(k_ranges, weights=k_weights)[0]
- k = random.randrange(k_lo, k_hi + 1)
-
- # Convert back to a solution to the original problem
- i = i0 + k * v
- j = j0 + k * minus_u
-
- offset = offset_align * i
- addr = addr_align * j
-
- assert addr == base_addr + offset
- return addr
-
-
class CallStack:
'''An abstract model of the x1 call stack'''
def __init__(self) -> None: