blob: a711983c99572981d50ba417eefc2e31ae307c6d [file]
# Copyright lowRISC contributors.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
import random
from typing import List, Optional, Tuple
def _extended_euclidean_algorithm(a: int, b: int) -> Tuple[int, int, int]:
'''The extended Euclidean algorithm.
Returns a tuple (r, s, t) so that gcd is the GCD of the two inputs and r =
a s + b t.
'''
r, r_nxt = a, b
s, s_nxt = 1, 0
t, t_nxt = 0, 1
while r_nxt:
q = r // r_nxt
r, r_nxt = r_nxt, r - q * r_nxt
s, s_nxt = s_nxt, s - q * s_nxt
t, t_nxt = t_nxt, t - q * t_nxt
# If both inputs are non-positive, the result comes out negative and we
# should flip all the signs.
if r < 0:
r, s, t = - r, - s, - t
return (r, s, t)
def _intersect_ranges(a: List[Tuple[int, int]],
b: List[Tuple[int, int]]) -> List[Tuple[int, int]]:
ret = []
paired = ([(r, False) for r in a] + [(r, True) for r in b])
arng = None # type: Optional[Tuple[int, int]]
brng = None # type: Optional[Tuple[int, int]]
for (lo, hi), is_b in sorted(paired):
if is_b:
if arng is not None:
a0, a1 = arng
if a0 <= hi and lo <= a1:
ret.append((max(a0, lo), min(a1, hi)))
brng = (lo, hi)
else:
if brng is not None:
b0, b1 = brng
if b0 <= hi and lo <= b1:
ret.append((max(lo, b0), min(hi, b1)))
arng = (lo, hi)
return ret
class KnownMem:
'''A representation of what memory/CSRs have architectural values'''
def __init__(self, top_addr: int):
assert top_addr > 0
self.top_addr = top_addr
# A list of pairs of addresses. If the pair (lo, hi) is in the list
# then each byte in the address range {lo..hi - 1} has a known value.
self.known_ranges = [] # type: List[Tuple[int, int]]
def copy(self) -> 'KnownMem':
'''Return a shallow copy of the object'''
ret = KnownMem(self.top_addr)
ret.known_ranges = self.known_ranges.copy()
return ret
def merge(self, other: 'KnownMem') -> None:
'''Merge in values from another KnownMem object'''
assert self.top_addr == other.top_addr
self.known_ranges = _intersect_ranges(self.known_ranges,
other.known_ranges)
def touch_range(self, base: int, width: int) -> None:
'''Mark {base .. base + width - 1} as known'''
assert 0 <= width
assert 0 <= base <= self.top_addr - width
for off in range(width):
self.touch_addr(base + off)
def touch_addr(self, addr: int) -> None:
'''Mark word starting at addr as known'''
assert 0 <= addr < self.top_addr
# Find the index of the last range that starts below us, if there is
# one, and the index of the first range that starts above us, if there
# is one.
last_idx_below = None
first_idx_above = None
for idx, (lo, hi) in enumerate(self.known_ranges):
if lo <= addr:
last_idx_below = idx
continue
first_idx_above = idx
break
# Are we below all other ranges?
if last_idx_below is None:
# Are we one address below the next range above? In which case, we
# need to shuffle it back one.
if first_idx_above is not None:
lo, hi = self.known_ranges[first_idx_above]
assert addr < lo
if addr == lo - 1:
self.known_ranges[first_idx_above] = (lo - 1, hi)
return
# Otherwise, we're disjoint. Add a one-element range at the start.
self.known_ranges = [(addr, addr + 1)] + self.known_ranges
return
# If not, are we inside a range? In that case, there's nothing to do.
left_lo, left_hi = self.known_ranges[last_idx_below]
if addr < left_hi:
return
left = self.known_ranges[:last_idx_below]
# Are we just above it?
if addr == left_hi:
# If there is no range above, we can just extend the last range by one.
if first_idx_above is None:
self.known_ranges = left + [(left_lo, left_hi + 1)]
return
# Otherwise, does this new address glue two ranges together?
assert first_idx_above == last_idx_below + 1
right_lo, right_hi = self.known_ranges[first_idx_above]
assert addr < right_lo
if addr == right_lo - 1:
self.known_ranges = (left + [(left_lo, right_hi)] +
self.known_ranges[first_idx_above + 1:])
return
# Otherwise, we still extend the range by one (but have to put the
# right hand list back too).
self.known_ranges = (left + [(left_lo, left_hi + 1)] +
self.known_ranges[first_idx_above:])
return
# We are miles above the left range. If there is no range above, we can
# just append a new 1-element range.
left_inc = self.known_ranges[:first_idx_above]
if first_idx_above is None:
self.known_ranges.append((addr, addr + 1))
return
# Otherwise, are we just below the next range?
assert first_idx_above == last_idx_below + 1
right_lo, right_hi = self.known_ranges[first_idx_above]
assert addr < right_lo
if addr == right_lo - 1:
self.known_ranges = (left_inc + [(right_lo - 1, right_hi)] +
self.known_ranges[first_idx_above + 1:])
return
# If not, we just insert a 1-element range in between
self.known_ranges = (left_inc + [(addr, addr + 1)] +
self.known_ranges[first_idx_above:])
return
def pick_lsu_target(self,
loads_value: bool,
base_addr: int,
offset_range: Tuple[int, int],
offset_align: int,
width: int,
addr_align: int) -> Optional[int]:
'''Try to pick an address with base and offset.
If loads_value is true, the memory needs a known value for at least
width bytes starting at that address. The address should be encodable
as base_addr + offset where offset is in offset_range (inclusive) and
is a multiple of offset_align. The address must be a multiple of
addr_align.
On failure, returns None. On success, returns the chosen address.
'''
assert offset_range[0] <= offset_range[1]
assert 1 <= offset_align
assert 1 <= width
assert 1 <= addr_align
# We're trying to pick an offset and an address so that
#
# base_addr + offset = addr
#
# Let's ignore offset_range and questions about valid memory addresses
# for a second. We have two alignment requirements from offset and
# addr, which mean we're really trying to satisfy something that looks
# like
#
# a = b i + c j
#
# for a = base_addr; b = -offset_align; c = addr_align: find solutions
# i, j.
#
# This is a 2-variable linear Diophantine equation. If gcd(b, c) does
# not divide a, there is no solution. Otherwise, the extended Euclidean
# algorithm yields x0, y0 such that
#
# gcd(b, c) = b x0 + c y0.
#
# Multiplying up by a / gcd(b, c) gives
#
# a = b i0 + c j0
#
# where i0 = x0 * a / gcd(b, c) and j0 = y0 * a / gcd(b, c).
#
# This is the "inhomogeneous part". It's a solution to the equation,
# and every other solution, (i, j) is a translate of the form
#
# i = i0 + k v
# j = j0 - k u
#
# for some k, where u = b / gcd(b, c) and v = c / gcd(b, c).
gcd, x0, y0 = _extended_euclidean_algorithm(-offset_align, addr_align)
assert gcd == -offset_align * x0 + addr_align * y0
assert 0 < gcd
if base_addr % gcd:
return None
# If gcd divides base_addr, we convert x0 and y0 to an initial solution
# (i0, j0) as described above by multiplying up by base_addr / gcd.
scale_factor = base_addr // gcd
i0 = x0 * scale_factor
j0 = y0 * scale_factor
minus_u = offset_align // gcd
v = addr_align // gcd
assert 0 < v
assert 0 < minus_u
# offset_range gives the possible values of offset, which is - b i
# in the equations above. Re-arranging the equation for i gives:
#
# k v = i - i0
#
# so
#
# b k v = b i - b i0 = - offset - b i0
#
# or
#
# k = (- offset - b i0) / (b v)
#
# Since b < 0 and v > 0, the denominator is negative and this is an
# increasing function of offset, so we can get the allowed range for k
# by evaluating it at the endpoints of offset_range.
bv = - offset_align * v
k_max = (-offset_range[1] + offset_align * i0) // bv
k_min = (-offset_range[0] + offset_align * i0 + (bv - 1)) // bv
# If k_min > k_max, this means b*v gives such big steps that none
# landed in the range of allowed offsets
if k_max < k_min:
return None
# Now, we need to consider which memory locations we can actually use.
# If we're writing memory, we have a single range of allowed addresses
# (all of memory!). If reading, we need to use self.known_ranges. In
# either case, adjust for the fact that we need a width-byte access and
# then rescale everything into "k units".
#
# To do that rescaling, we know that c j = addr and that j = j0 - k u.
# So
#
# j0 - k u = addr / c
# k u = j0 - addr / c
# k = (j0 - addr / c) / u
# = (addr / c - j0) / (- u)
#
# Since u is negative, this is an increasing function of addr, so we
# can use address endpoints to get (disjoint) ranges for k.
k_ranges = []
k_weights = []
byte_ranges = (self.known_ranges
if loads_value else [(0, self.top_addr - 1)])
for byte_lo, byte_top in byte_ranges:
# Since we're doing an access of width bytes, we round byte_top
# down to the largest base address where the access lies completely
# in the range.
base_hi = byte_top - width
if base_hi < byte_lo:
continue
# Compute the valid range for addr/c, rounding inwards.
word_lo = (byte_lo + addr_align - 1) // addr_align
word_hi = base_hi // addr_align
# If word_hi < word_lo, there are no multiples of addr_align in the
# range [byte_lo, base_hi].
if word_hi < word_lo:
continue
# Now translate by -j0 and divide through by -u, rounding inwards.
k_hi = (word_hi - j0) // minus_u
k_lo = (word_lo - j0 + (minus_u - 1)) // minus_u
# If k_hi < k_lo, that means there are no multiples of u in the
# range [word_lo - j0, word_hi - j0].
if k_hi < k_lo:
continue
# Finally, take the intersection with [k_min, k_max]. The
# intersection is non-empty so long as k_lo <= k_max and k_min <=
# k_hi.
if k_lo > k_max or k_min > k_hi:
continue
k_lo = max(k_lo, k_min)
k_hi = min(k_hi, k_max)
k_ranges.append((k_lo, k_hi))
k_weights.append(k_hi - k_lo + 1)
if not k_ranges:
return None
# We can finally pick a value of k. Pick the range (weighted by
# k_weights) and then pick uniformly from in that range.
k_lo, k_hi = random.choices(k_ranges, weights=k_weights)[0]
k = random.randrange(k_lo, k_hi + 1)
# Convert back to a solution to the original problem
i = i0 + k * v
j = j0 + k * minus_u
offset = offset_align * i
addr = addr_align * j
assert addr == base_addr + offset
return addr