hw/ip/otbn/util/rig/snippet_gen.py - 3p/lowrisc/opentitan - Git at Google

 # Copyright lowRISC contributors.
 # Licensed under the Apache License, Version 2.0, see LICENSE for details.
 # SPDX-License-Identifier: Apache-2.0

 import random
 from typing import List, Optional, Tuple

 from shared.insn_yaml import Insn, InsnsFile
 from shared.lsu_desc import LSUDesc
 from shared.operand import ImmOperandType, RegOperandType

 from .program import ProgInsn, Program
 from .model import Model
 from .snippet import Snippet


 class SnippetGen:
     '''A parameterised sequence of instructions

     These can be added to the instructions generated so far for a given random
     binary.

     '''
     def gen(self,
             size: int,
             model: Model,
             program: Program) -> Optional[Tuple[Snippet, bool, int]]:
         '''Try to generate instructions for this type of snippet.

         size is always positive and gives an upper bound on the number of
         instructions in the dynamic instruction stream that this should
         generate. For example, a loop of 10 instructions that goes around 10
         times would consume 100 from size.

         On success, inserts the instructions into program, updates the model,
         and returns a tuple (snippet, done, new_size). snippet is the generated
         snippet. done is true if the program is finished (if snippet ends with
         ecall) and is false otherwise. new_size is the size left after the
         generated snippet.

         On failure, leaves program and model unchanged and returns None. There
         should always be at least one snippet generator with positive weight
         (see pick_weight below) that succeeds unconditionally. This will be the
         ecall generator. Failure is interpreted as "this snippet won't work
         with the current program state", but the generator may be retried
         later.

         '''
         raise NotImplementedError('gen not implemented by subclass')

     def pick_weight(self,
                     size: int,
                     model: Model,
                     program: Program) -> float:
         '''Pick a weight by which to multiply this generator's default weight

         This is called for each generator before we start trying to generate a
         snippet for a given program and model state. This can be used to
         disable a generator when we know it won't work (if size is too small, for
         example).

         It can also be used to alter weights depending on where we are in the
         program. For example, a generator that generates ecall to end the
         program could decrease its weight when size is large, to avoid
         generating tiny programs by accident.

         The default implementation always returns 1.0.

         '''
         return 1.0


 class ECall(SnippetGen):
     '''A generator that makes a snippet with a single ECALL instruction'''
     def __init__(self, insns_file: InsnsFile) -> None:
         ecall_insn = insns_file.mnemonic_to_insn.get('ecall')
         if ecall_insn is None:
             raise RuntimeError('No ECALL instruction in instructions file')

         if ecall_insn.operands:
             raise RuntimeError('ECALL instruction in instructions file '
                                'has a nonempty list of operands.')

         if ecall_insn.lsu:
             raise RuntimeError('ECALL instruction in instructions file '
                                'has unexpected LSU information.')

         self.insn = ProgInsn(ecall_insn, [], None)

     def gen(self,
             size: int,
             model: Model,
             program: Program) -> Optional[Tuple[Snippet, bool, int]]:
         snippet = Snippet([(model.pc, [self.insn])])
         snippet.insert_into_program(program)
         return (snippet, True, 0)

     def pick_weight(self,
                     size: int,
                     model: Model,
                     program: Program) -> float:
         # Choose small weights when size is large and large ones when it's
         # small.
         assert size > 0
         return (1e-10 if size > 5
                 else 0.1 if size > 1
                 else 1e10)


 class StraightLineInsn(SnippetGen):
     '''A super-simple snippet consisting of a single instruction'''
     def __init__(self, insns_file: InsnsFile) -> None:
         # Find all the straight line, non-pseudo instructions in insns_file
         self.insns = []
         for insn in insns_file.insns:
             # Skip pseudo-ops
             if insn.python_pseudo_op or insn.literal_pseudo_op:
                 continue

             # Skip anything that isn't straight-line
             if not insn.straight_line:
                 continue

             # Skip bn.sid, bn.lid and bn.movr: These are indirect and we don't
             # currently track their sources properly (e.g. "bn.movr x2, x3"
             # reads from the WDR whose index is whatever is currently in x3)
             if insn.mnemonic in ['bn.sid', 'bn.lid', 'bn.movr']:
                 continue

             self.insns.append(insn)

     def gen(self,
             size: int,
             model: Model,
             program: Program) -> Optional[Tuple[Snippet, bool, int]]:

         # Pick a (YAML) instruction at random. We'll probably do some clever
         # weighting here later on but, for now, we'll pick uniformly at the
         # start.
         weights = [1.0] * len(self.insns)

         prog_insn = None
         while prog_insn is None:
             idx = random.choices(range(len(self.insns)), weights=weights)[0]
             # Sanity check to make sure some weight was positive
             assert weights[idx] > 0

             # Try to fill out the instruction. On failure, clear the weight for
             # this index and go around again.
             prog_insn = self.fill_insn(self.insns[idx], model)
             if prog_insn is None:
                 weights[idx] = 0
                 continue

         # Success! We have generated an instruction. Put it in a snippet and
         # add that to the program
         snippet = Snippet([(model.pc, [prog_insn])])
         snippet.insert_into_program(program)

         # Then update the model with the instruction and update the model PC
         model.update_for_insn(prog_insn)
         model.pc += 4

         return (snippet, False, size - 1)

     def fill_insn(self, insn: Insn, model: Model) -> Optional[ProgInsn]:
         '''Try to fill out an instruction

         This might fail if, for example, the model doesn't have enough
         registers with architectural values. In that case, return None.

         '''

         # If this is not an LSU operation, or it is an LSU operation that
         # operates on CSR/WSRs, we can pick operands independently.
         if insn.lsu is None:
             # For each operand, pick a value that's allowed by the model (i.e.
             # one that won't trigger any undefined behaviour)
             op_vals = []
             for operand in insn.operands:
                 op_val = model.pick_operand_value(operand.op_type)
                 if op_val is None:
                     return None

                 op_vals.append(op_val)

             assert len(op_vals) == len(insn.operands)
             return ProgInsn(insn, op_vals, None)

         # If this is an LSU operation, then the target address is given by the
         # sum of one or more operands. For each of these operands with a
         # register type, we are going to need to look in the model to figure
         # out the list of different known values we can give it. At the moment,
         # we only support the case when there is at most one non-register
         # operand, which must be an immediate. Grab that operand's name too.
         lsu_imm_op = None
         lsu_reg_ops = []
         lsu_reg_types = set()
         imm_op_min = 0
         imm_op_max = 0

         for tgt_op_name in insn.lsu.target:
             tgt_op = insn.name_to_operand[tgt_op_name]
             if isinstance(tgt_op.op_type, ImmOperandType):
                 if lsu_imm_op is not None:
                     raise RuntimeError('Multiple immediate operands '
                                        'contribute to target for instruction '
                                        '{!r}. Not currently supported.'
                                        .format(insn.mnemonic))
                 lsu_imm_op = tgt_op_name

                 imm_op_range = tgt_op.op_type.get_range()
                 if imm_op_range is None:
                     assert tgt_op.op_type.width is None
                     raise RuntimeError('The {!r} immediate operand for the '
                                        '{!r} instruction contributes to its '
                                        'LSU target but has no width.'
                                        .format(tgt_op_name, insn.mnemonic))

                 imm_op_min, imm_op_max = imm_op_range
                 continue

             if isinstance(tgt_op.op_type, RegOperandType):
                 reg_type = tgt_op.op_type.reg_type
                 lsu_reg_ops.append((tgt_op_name, reg_type))
                 lsu_reg_types.add(reg_type)
                 continue

             raise RuntimeError('Unknown operand type for {!r} operand of '
                                '{!r} instruction: {}.'
                                .format(tgt_op_name, insn.mnemonic,
                                        type(tgt_op.op_type).__name__))

         # We have a list of register operands, together with their types. Get a
         # list of registers with known values for each register type we've seen.
         known_regs_by_type = {rtype: model.regs_with_known_vals(rtype)
                               for rtype in lsu_reg_types}

         # And turn that into a dict keyed by operand name
         op_to_known_regs = {op_name: known_regs_by_type[op_type]
                             for op_name, op_type in lsu_reg_ops}

         # Ask the model to try to find a target we can use. If this is a load
         # or a CSR operation, it will have to be an address that already has an
         # architectural value. If a store, it can be any address in range.
         lsu_type_to_info = {
             'mem-load': ('dmem', True),
             'mem-store': ('dmem', False),
             'csr': ('csr', True),
             'wsr': ('wsr', True)
         }
         assert set(lsu_type_to_info.keys()) == set(LSUDesc.TYPES)
         mem_type, loads_value = lsu_type_to_info[insn.lsu.lsu_type]

         tgt = model.pick_lsu_target(mem_type,
                                     loads_value,
                                     op_to_known_regs,
                                     imm_op_min,
                                     imm_op_max,
                                     insn.lsu.idx_width)
         if tgt is None:
             return None

         addr, imm_val, reg_indices = tgt
         assert imm_op_min <= imm_val <= imm_op_max

         op_vals = []
         for operand in insn.operands:
             # Is this the immediate? If the immediate operand is signed then
             # note that imm_op_min < 0 and we might have that imm_val < 0.
             # However, we store everything unsigned in op_vals, so we have to
             # reverse the 2s complement here.
             if operand.name == lsu_imm_op:
                 assert isinstance(operand.op_type, ImmOperandType)
                 op_vals.append(operand.op_type.encode_val(imm_val))
                 continue

             # Or is it a register operand contributing to the target address?
             reg_val = reg_indices.get(operand.name)
             if reg_val is not None:
                 op_vals.append(reg_val)
                 continue

             # Otherwise it's some other operand. Pick any old value.
             val = model.pick_operand_value(operand.op_type)
             if val is None:
                 return None
             op_vals.append(val)

         assert len(op_vals) == len(insn.operands)
         return ProgInsn(insn, op_vals, (mem_type, addr))


 class SnippetGens:
     '''A collection of snippet generators'''
     _WEIGHTED_CLASSES = [
         (ECall, 1.0),
         (StraightLineInsn, 1.0)
     ]

     def __init__(self, insns_file: InsnsFile) -> None:
         self.generators = []  # type: List[Tuple[SnippetGen, float]]
         for cls, weight in SnippetGens._WEIGHTED_CLASSES:
             self.generators.append((cls(insns_file), weight))

     def gen(self,
             size: int,
             model: Model,
             program: Program) -> Tuple[Snippet, bool, int]:
         '''Pick a snippet and update model, program with its contents.

         Returns a pair (snippet, done, new_size) with the same meanings as
         Snippet.gen, except that new_size is clamped to be at least 1 if done
         is false. This avoids snippets having to special-case to make sure they
         aren't chosen when size is near zero. The end result might be a
         slightly longer instruction stream than we intended, but it shouldn't
         be much bigger.

         '''
         real_weights = []
         for generator, weight in self.generators:
             weight_mult = generator.pick_weight(size, model, program)
             real_weights.append(weight * weight_mult)

         while True:
             # Pick a generator based on the weights in real_weights.
             idx = random.choices(range(len(self.generators)),
                                  weights=real_weights)[0]
             generator, _ = self.generators[idx]

             # Note that there should always be at least one non-zero weight in
             # real_weights. random.choices doesn't check that: if you pass all
             # weights equal to zero, it always picks the last element. Since
             # that would cause an infinite loop, add a sanity check here to
             # make sure that the choice we made had positive weight.
             assert real_weights[idx] > 0

             # Run the generator to generate a snippet
             gen_res = generator.gen(size, model, program)
             if gen_res is not None:
                 snippet, done, new_size = gen_res
                 if not done:
                     new_size = max(new_size, 1)

                 return (snippet, done, new_size)

             # If gen_res is None, the generator failed. Set that weight to zero
             # and try again.
             real_weights[idx] = 0.0
	# Copyright lowRISC contributors.
	# Licensed under the Apache License, Version 2.0, see LICENSE for details.
	# SPDX-License-Identifier: Apache-2.0

	import random
	from typing import List, Optional, Tuple

	from shared.insn_yaml import Insn, InsnsFile
	from shared.lsu_desc import LSUDesc
	from shared.operand import ImmOperandType, RegOperandType

	from .program import ProgInsn, Program
	from .model import Model
	from .snippet import Snippet


	class SnippetGen:
	'''A parameterised sequence of instructions

	These can be added to the instructions generated so far for a given random
	binary.

	'''
	def gen(self,
	size: int,
	model: Model,
	program: Program) -> Optional[Tuple[Snippet, bool, int]]:
	'''Try to generate instructions for this type of snippet.

	size is always positive and gives an upper bound on the number of
	instructions in the dynamic instruction stream that this should
	generate. For example, a loop of 10 instructions that goes around 10
	times would consume 100 from size.

	On success, inserts the instructions into program, updates the model,
	and returns a tuple (snippet, done, new_size). snippet is the generated
	snippet. done is true if the program is finished (if snippet ends with
	ecall) and is false otherwise. new_size is the size left after the
	generated snippet.

	On failure, leaves program and model unchanged and returns None. There
	should always be at least one snippet generator with positive weight
	(see pick_weight below) that succeeds unconditionally. This will be the
	ecall generator. Failure is interpreted as "this snippet won't work
	with the current program state", but the generator may be retried
	later.

	'''
	raise NotImplementedError('gen not implemented by subclass')

	def pick_weight(self,
	size: int,
	model: Model,
	program: Program) -> float:
	'''Pick a weight by which to multiply this generator's default weight

	This is called for each generator before we start trying to generate a
	snippet for a given program and model state. This can be used to
	disable a generator when we know it won't work (if size is too small, for
	example).

	It can also be used to alter weights depending on where we are in the
	program. For example, a generator that generates ecall to end the
	program could decrease its weight when size is large, to avoid
	generating tiny programs by accident.

	The default implementation always returns 1.0.

	'''
	return 1.0


	class ECall(SnippetGen):
	'''A generator that makes a snippet with a single ECALL instruction'''
	def __init__(self, insns_file: InsnsFile) -> None:
	ecall_insn = insns_file.mnemonic_to_insn.get('ecall')
	if ecall_insn is None:
	raise RuntimeError('No ECALL instruction in instructions file')

	if ecall_insn.operands:
	raise RuntimeError('ECALL instruction in instructions file '
	'has a nonempty list of operands.')

	if ecall_insn.lsu:
	raise RuntimeError('ECALL instruction in instructions file '
	'has unexpected LSU information.')

	self.insn = ProgInsn(ecall_insn, [], None)

	def gen(self,
	size: int,
	model: Model,
	program: Program) -> Optional[Tuple[Snippet, bool, int]]:
	snippet = Snippet([(model.pc, [self.insn])])
	snippet.insert_into_program(program)
	return (snippet, True, 0)

	def pick_weight(self,
	size: int,
	model: Model,
	program: Program) -> float:
	# Choose small weights when size is large and large ones when it's
	# small.
	assert size > 0
	return (1e-10 if size > 5
	else 0.1 if size > 1
	else 1e10)


	class StraightLineInsn(SnippetGen):
	'''A super-simple snippet consisting of a single instruction'''
	def __init__(self, insns_file: InsnsFile) -> None:
	# Find all the straight line, non-pseudo instructions in insns_file
	self.insns = []
	for insn in insns_file.insns:
	# Skip pseudo-ops
	if insn.python_pseudo_op or insn.literal_pseudo_op:
	continue

	# Skip anything that isn't straight-line
	if not insn.straight_line:
	continue

	# Skip bn.sid, bn.lid and bn.movr: These are indirect and we don't
	# currently track their sources properly (e.g. "bn.movr x2, x3"
	# reads from the WDR whose index is whatever is currently in x3)
	if insn.mnemonic in ['bn.sid', 'bn.lid', 'bn.movr']:
	continue

	self.insns.append(insn)

	def gen(self,
	size: int,
	model: Model,
	program: Program) -> Optional[Tuple[Snippet, bool, int]]:

	# Pick a (YAML) instruction at random. We'll probably do some clever
	# weighting here later on but, for now, we'll pick uniformly at the
	# start.
	weights = [1.0] * len(self.insns)

	prog_insn = None
	while prog_insn is None:
	idx = random.choices(range(len(self.insns)), weights=weights)[0]
	# Sanity check to make sure some weight was positive
	assert weights[idx] > 0

	# Try to fill out the instruction. On failure, clear the weight for
	# this index and go around again.
	prog_insn = self.fill_insn(self.insns[idx], model)
	if prog_insn is None:
	weights[idx] = 0
	continue

	# Success! We have generated an instruction. Put it in a snippet and
	# add that to the program
	snippet = Snippet([(model.pc, [prog_insn])])
	snippet.insert_into_program(program)

	# Then update the model with the instruction and update the model PC
	model.update_for_insn(prog_insn)
	model.pc += 4

	return (snippet, False, size - 1)

	def fill_insn(self, insn: Insn, model: Model) -> Optional[ProgInsn]:
	'''Try to fill out an instruction

	This might fail if, for example, the model doesn't have enough
	registers with architectural values. In that case, return None.

	'''

	# If this is not an LSU operation, or it is an LSU operation that
	# operates on CSR/WSRs, we can pick operands independently.
	if insn.lsu is None:
	# For each operand, pick a value that's allowed by the model (i.e.
	# one that won't trigger any undefined behaviour)
	op_vals = []
	for operand in insn.operands:
	op_val = model.pick_operand_value(operand.op_type)
	if op_val is None:
	return None

	op_vals.append(op_val)

	assert len(op_vals) == len(insn.operands)
	return ProgInsn(insn, op_vals, None)

	# If this is an LSU operation, then the target address is given by the
	# sum of one or more operands. For each of these operands with a
	# register type, we are going to need to look in the model to figure
	# out the list of different known values we can give it. At the moment,
	# we only support the case when there is at most one non-register
	# operand, which must be an immediate. Grab that operand's name too.
	lsu_imm_op = None
	lsu_reg_ops = []
	lsu_reg_types = set()
	imm_op_min = 0
	imm_op_max = 0

	for tgt_op_name in insn.lsu.target:
	tgt_op = insn.name_to_operand[tgt_op_name]
	if isinstance(tgt_op.op_type, ImmOperandType):
	if lsu_imm_op is not None:
	raise RuntimeError('Multiple immediate operands '
	'contribute to target for instruction '
	'{!r}. Not currently supported.'
	.format(insn.mnemonic))
	lsu_imm_op = tgt_op_name

	imm_op_range = tgt_op.op_type.get_range()
	if imm_op_range is None:
	assert tgt_op.op_type.width is None
	raise RuntimeError('The {!r} immediate operand for the '
	'{!r} instruction contributes to its '
	'LSU target but has no width.'
	.format(tgt_op_name, insn.mnemonic))

	imm_op_min, imm_op_max = imm_op_range
	continue

	if isinstance(tgt_op.op_type, RegOperandType):
	reg_type = tgt_op.op_type.reg_type
	lsu_reg_ops.append((tgt_op_name, reg_type))
	lsu_reg_types.add(reg_type)
	continue

	raise RuntimeError('Unknown operand type for {!r} operand of '
	'{!r} instruction: {}.'
	.format(tgt_op_name, insn.mnemonic,
	type(tgt_op.op_type).__name__))

	# We have a list of register operands, together with their types. Get a
	# list of registers with known values for each register type we've seen.
	known_regs_by_type = {rtype: model.regs_with_known_vals(rtype)
	for rtype in lsu_reg_types}

	# And turn that into a dict keyed by operand name
	op_to_known_regs = {op_name: known_regs_by_type[op_type]
	for op_name, op_type in lsu_reg_ops}

	# Ask the model to try to find a target we can use. If this is a load
	# or a CSR operation, it will have to be an address that already has an
	# architectural value. If a store, it can be any address in range.
	lsu_type_to_info = {
	'mem-load': ('dmem', True),
	'mem-store': ('dmem', False),
	'csr': ('csr', True),
	'wsr': ('wsr', True)
	}
	assert set(lsu_type_to_info.keys()) == set(LSUDesc.TYPES)
	mem_type, loads_value = lsu_type_to_info[insn.lsu.lsu_type]

	tgt = model.pick_lsu_target(mem_type,
	loads_value,
	op_to_known_regs,
	imm_op_min,
	imm_op_max,
	insn.lsu.idx_width)
	if tgt is None:
	return None

	addr, imm_val, reg_indices = tgt
	assert imm_op_min <= imm_val <= imm_op_max

	op_vals = []
	for operand in insn.operands:
	# Is this the immediate? If the immediate operand is signed then
	# note that imm_op_min < 0 and we might have that imm_val < 0.
	# However, we store everything unsigned in op_vals, so we have to
	# reverse the 2s complement here.
	if operand.name == lsu_imm_op:
	assert isinstance(operand.op_type, ImmOperandType)
	op_vals.append(operand.op_type.encode_val(imm_val))
	continue

	# Or is it a register operand contributing to the target address?
	reg_val = reg_indices.get(operand.name)
	if reg_val is not None:
	op_vals.append(reg_val)
	continue

	# Otherwise it's some other operand. Pick any old value.
	val = model.pick_operand_value(operand.op_type)
	if val is None:
	return None
	op_vals.append(val)

	assert len(op_vals) == len(insn.operands)
	return ProgInsn(insn, op_vals, (mem_type, addr))


	class SnippetGens:
	'''A collection of snippet generators'''
	_WEIGHTED_CLASSES = [
	(ECall, 1.0),
	(StraightLineInsn, 1.0)
	]

	def __init__(self, insns_file: InsnsFile) -> None:
	self.generators = [] # type: List[Tuple[SnippetGen, float]]
	for cls, weight in SnippetGens._WEIGHTED_CLASSES:
	self.generators.append((cls(insns_file), weight))

	def gen(self,
	size: int,
	model: Model,
	program: Program) -> Tuple[Snippet, bool, int]:
	'''Pick a snippet and update model, program with its contents.

	Returns a pair (snippet, done, new_size) with the same meanings as
	Snippet.gen, except that new_size is clamped to be at least 1 if done
	is false. This avoids snippets having to special-case to make sure they
	aren't chosen when size is near zero. The end result might be a
	slightly longer instruction stream than we intended, but it shouldn't
	be much bigger.

	'''
	real_weights = []
	for generator, weight in self.generators:
	weight_mult = generator.pick_weight(size, model, program)
	real_weights.append(weight * weight_mult)

	while True:
	# Pick a generator based on the weights in real_weights.
	idx = random.choices(range(len(self.generators)),
	weights=real_weights)[0]
	generator, _ = self.generators[idx]

	# Note that there should always be at least one non-zero weight in
	# real_weights. random.choices doesn't check that: if you pass all
	# weights equal to zero, it always picks the last element. Since
	# that would cause an infinite loop, add a sanity check here to
	# make sure that the choice we made had positive weight.
	assert real_weights[idx] > 0

	# Run the generator to generate a snippet
	gen_res = generator.gen(size, model, program)
	if gen_res is not None:
	snippet, done, new_size = gen_res
	if not done:
	new_size = max(new_size, 1)

	return (snippet, done, new_size)

	# If gen_res is None, the generator failed. Set that weight to zero
	# and try again.
	real_weights[idx] = 0.0