blob: 860706eb777cdddcc0e0fd6ad19d9fc4639bb3c8 [file] [log] [blame]
Rupert Swarbrick63da48e2020-07-08 10:16:57 +01001#!/usr/bin/env python3
2# Copyright lowRISC contributors.
3# Licensed under the Apache License, Version 2.0, see LICENSE for details.
4# SPDX-License-Identifier: Apache-2.0
5
6'''Generate Markdown documentation for the instructions in insns.yml'''
7
8import argparse
Rupert Swarbrickd3de4bb2020-09-01 17:50:19 +01009import os
Rupert Swarbrick63da48e2020-07-08 10:16:57 +010010import sys
Rupert Swarbrick74d2e292021-01-08 15:20:09 +000011from typing import Dict, List, Optional, TextIO, Tuple
Rupert Swarbrick63da48e2020-07-08 10:16:57 +010012
Rupert Swarbrickb2040532020-08-06 15:08:55 +010013from shared.bool_literal import BoolLiteral
14from shared.encoding import Encoding
Rupert Swarbrickd3de4bb2020-09-01 17:50:19 +010015from shared.insn_yaml import Insn, InsnsFile, InsnGroup, load_file
Rupert Swarbrick2c116752021-01-19 10:16:33 +000016from shared.operand import EnumOperandType, OptionOperandType, Operand
Rupert Swarbrick74d2e292021-01-08 15:20:09 +000017
Rupert Swarbrick4077bec2020-10-05 11:50:11 +010018from docs.get_impl import read_implementation
19
Rupert Swarbrick74d2e292021-01-08 15:20:09 +000020_O2EDicts = Tuple[Dict[str, List[str]], Dict[int, str]]
Rupert Swarbrick63da48e2020-07-08 10:16:57 +010021
22
Rupert Swarbrick74d2e292021-01-08 15:20:09 +000023def render_operand_row(operand: Operand,
24 op_ranges: Optional[List[str]]) -> str:
Rupert Swarbrick63da48e2020-07-08 10:16:57 +010025 '''Generate the single row of a markdown table for an operand'''
26
27 # This is in <tr><td> form, but we want to embed arbitrary markup (and
28 # don't want to have to faff around with &lt; encodings. So we have to
29 # include a blank line above and below. This makes (at least) Github
30 # flavoured markdown switch back to "markdown mode" for the contents.
31 parts = []
32 parts.append('<tr><td>\n\n')
Rupert Swarbrick74d2e292021-01-08 15:20:09 +000033 parts.append('`{}`'.format(operand.name))
Rupert Swarbrick63da48e2020-07-08 10:16:57 +010034 parts.append('\n\n</td><td>')
35
36 # The "description" cell contains any documentation supplied in the file,
37 # and then any extra documentation that's implied by the type of the
38 # operand.
39 if operand.doc is not None:
40 parts.append('\n\n')
41 parts.append(operand.doc)
42
43 if operand.op_type is not None:
44 ot_doc = operand.op_type.markdown_doc()
45 if ot_doc is not None:
46 parts.append('\n\n')
47 parts.append(ot_doc)
48
Rupert Swarbrick74d2e292021-01-08 15:20:09 +000049 if op_ranges is not None:
50 parts.append('\n\n')
51 dec_str = operand.op_type.describe_decode(op_ranges)
52 parts.append('Decode as `{}`\n\n'.format(dec_str))
53
Rupert Swarbrick63da48e2020-07-08 10:16:57 +010054 parts.append('\n\n</td></tr>')
55 return ''.join(parts)
56
57
Rupert Swarbrick74d2e292021-01-08 15:20:09 +000058def render_operand_table(operands: List[Operand],
59 o2e: Optional[Dict[str, List[str]]]) -> str:
Rupert Swarbrick63da48e2020-07-08 10:16:57 +010060 '''Generate the operand table for an instruction'''
61
62 # We have to generate this in <tr><td> form because we want to put
63 # block-level elements into the table cells (and markdown tables only
64 # support inline elements).
65 parts = []
66 parts.append('<table><thead>'
Rupert Swarbrick74d2e292021-01-08 15:20:09 +000067 '<tr><th>Operand</th><th>Description</th></tr>'
Rupert Swarbrick63da48e2020-07-08 10:16:57 +010068 '</thead>'
69 '<tbody>')
Rupert Swarbrick74d2e292021-01-08 15:20:09 +000070 for operand in operands:
71 if o2e is None:
72 op_ranges = None
73 else:
74 op_ranges = o2e.get(operand.name)
75 # If we had an encoding, it should have encoded every operand, so
76 # name_op_enc_fields should have picked up operand.
77 assert op_ranges is not None
78
79 parts.append(render_operand_row(operand, op_ranges))
80
Rupert Swarbrick63da48e2020-07-08 10:16:57 +010081 parts.append('</tbody></table>\n\n')
82 return ''.join(parts)
83
84
Rupert Swarbrickff1a7be2020-08-20 15:10:32 +010085def render_encoding(mnemonic: str,
Rupert Swarbrick74d2e292021-01-08 15:20:09 +000086 encoding: Encoding,
87 e2o: Dict[int, str]) -> str:
Rupert Swarbrick56a2ac52020-07-14 09:57:04 +010088 '''Generate a table displaying an instruction encoding'''
89 parts = []
90 parts.append('<table style="font-size: 75%">')
91 parts.append('<tr>')
92 parts.append('<td></td>')
93 for bit in range(31, -1, -1):
94 parts.append('<td>{}</td>'.format(bit))
95 parts.append('</tr>')
96
97 # Build dictionary of bit ranges, keyed by the msb and with value a pair
98 # (width, desc) where width is the width of the range in bits and desc is a
99 # string describing what is stored in the range.
100 by_msb = {}
101
102 for field_name, field in encoding.fields.items():
103 scheme_field = field.scheme_field
104 # If this field is a literal value, explode it into single bits. To do
105 # so, we walk the ranges and match up with ranges in the value.
106 if isinstance(field.value, BoolLiteral):
107 assert field.value.width > 0
108 assert field.value.width == scheme_field.bits.width
109 bits_seen = 0
110 for msb, lsb in scheme_field.bits.ranges:
111 val_msb = scheme_field.bits.width - 1 - bits_seen
112 val_lsb = val_msb - msb + lsb
113 bits_seen += msb - lsb + 1
114
115 for idx in range(0, msb - lsb + 1):
116 desc = field.value.char_for_bit(val_lsb + idx)
117 by_msb[lsb + idx] = (1, '' if desc == 'x' else desc)
118 continue
119
Rupert Swarbrick74d2e292021-01-08 15:20:09 +0000120 # Otherwise this field's value is an operand name. name_op_enc_fields
121 # should have added the MSBs of its ranges to e2o.
Rupert Swarbrick56a2ac52020-07-14 09:57:04 +0100122 assert isinstance(field.value, str)
Rupert Swarbrick56a2ac52020-07-14 09:57:04 +0100123 for msb, lsb in scheme_field.bits.ranges:
Rupert Swarbrick74d2e292021-01-08 15:20:09 +0000124 assert msb in e2o
125 by_msb[msb] = (msb - lsb + 1, e2o[msb])
Rupert Swarbrick56a2ac52020-07-14 09:57:04 +0100126
127 parts.append('<tr>')
128 parts.append('<td>{}</td>'.format(mnemonic.upper()))
129
130 # Now run down the ranges in descending order of msb to get the table cells
131 next_bit = 31
132 for msb in sorted(by_msb.keys(), reverse=True):
Rupert Swarbrick7087ed82020-11-13 18:16:46 +0000133 # Check to make sure we have a dense table (this should be guaranteed
134 # because encoding objects ensure they hit every bit).
Rupert Swarbrick56a2ac52020-07-14 09:57:04 +0100135 assert msb == next_bit
136
137 width, desc = by_msb[msb]
138 next_bit = msb - width
139
140 parts.append('<td colspan="{}">{}</td>'.format(width, desc))
141
142 assert next_bit == -1
143 parts.append('</tr>')
144
145 parts.append('</table>\n\n')
146 return ''.join(parts)
147
148
Rupert Swarbricka2789b02020-07-24 18:25:28 +0100149def render_literal_pseudo_op(rewrite: List[str]) -> str:
150 '''Generate documentation with expansion of a pseudo op'''
151 parts = []
152 parts.append('This instruction is a pseudo-operation and expands to the '
153 'following instruction sequence:\n```\n')
154 for line in rewrite:
155 parts.append(line)
156 parts.append('\n')
157 parts.append('```\n\n')
158 return ''.join(parts)
159
160
Rupert Swarbrickdcc25892021-01-19 10:56:18 +0000161def name_op_enc_fields(name_to_operand: Dict[str, Operand],
162 encoding: Encoding) -> _O2EDicts:
Rupert Swarbrick74d2e292021-01-08 15:20:09 +0000163 '''Name the encoding fields corresponding to operators
164
165 In the generated documentation, we name encoding fields based on the
166 operand that the encode. For example, if the operand "foo" is encoded in a
167 field, the field will be labelled "FOO" in the table. If the field is split
168 over multiple bit ranges, they will be labelled like "FOO_0", "FOO_1" etc,
Rupert Swarbrickdcc25892021-01-19 10:56:18 +0000169 counting from the LSB. If an operand has an abbreviated name, this will be
170 used for the field instead of the full operand name.
Rupert Swarbrick74d2e292021-01-08 15:20:09 +0000171
172 Returns a pair of dicts: (o2e, e2o). o2e maps an operand name to the list
173 of (our names for) encoding fields that contribute to it, MSB first. e2o
174 maps the MSB of a bit range in an encoding field to the name that should
175 appear for that range in the documentation.
176
177 In the example above, o2e['foo'] = ["FOO_1", "FOO_0"]. Suppose that the
178 upper range of bits for the encoding field for 'foo' had MSB 10. Then
179 e2o[10] = 'FOO_1'.
180
181 '''
182 o2e = {} # type: Dict[str, List[str]]
183 e2o = {} # type: Dict[int, str]
184
185 for field_name, field in encoding.fields.items():
186 # Ignore literal values: these don't correspond to operands
187 if isinstance(field.value, BoolLiteral):
188 continue
189
190 # Otherwise this field's value is an operand name
191 assert isinstance(field.value, str)
192 operand_name = field.value
193
194 # An encoding should never use an operand more than once
195 assert operand_name not in o2e
196
Rupert Swarbrickdcc25892021-01-19 10:56:18 +0000197 # Get the base name to use for fields. This is either an upper-case
198 # version of the operand name, or uses the operand's abbreviated name
199 # if available.
200 operand = name_to_operand.get(operand_name)
201 assert operand is not None
202 basename = operand_name if operand.abbrev is None else operand.abbrev
203 basename = basename.upper()
204
Rupert Swarbrick74d2e292021-01-08 15:20:09 +0000205 # There should always be at least one bit range for the field
206 scheme_field = field.scheme_field
207 assert scheme_field.bits.ranges
208
209 # If there is just one bit range, we generate a single named range by
210 # capitalizing the operand name.
211 if len(scheme_field.bits.ranges) == 1:
212 msb = scheme_field.bits.ranges[0][0]
213 assert msb not in e2o
Rupert Swarbrickdcc25892021-01-19 10:56:18 +0000214 range_name = basename
Rupert Swarbrick74d2e292021-01-08 15:20:09 +0000215 o2e[operand_name] = [range_name]
216 e2o[msb] = range_name
217 continue
218
Rupert Swarbrickf1574552021-05-26 17:30:43 +0100219 # Otherwise, we need to label the operands. We iterate over the ranges
220 # in scheme_field LSB-first (so that we can number things with the LSB
221 # field having index zero).
Rupert Swarbrick74d2e292021-01-08 15:20:09 +0000222 o2e_list = []
Rupert Swarbrickf1574552021-05-26 17:30:43 +0100223 for idx, (msb, lsb) in enumerate(reversed(scheme_field.bits.ranges)):
Rupert Swarbrickdcc25892021-01-19 10:56:18 +0000224 range_name = '{}_{}'.format(basename, idx)
Rupert Swarbrick74d2e292021-01-08 15:20:09 +0000225 o2e_list.append(range_name)
226 assert msb not in e2o
227 e2o[msb] = range_name
228 # We want to store o2e_list MSB-first, so reverse it here.
229 o2e_list.reverse()
230 o2e[operand_name] = o2e_list
231
232 return (o2e, e2o)
233
234
Rupert Swarbrick4077bec2020-10-05 11:50:11 +0100235def render_insn(insn: Insn, impl: Optional[str], heading_level: int) -> str:
Rupert Swarbrick63da48e2020-07-08 10:16:57 +0100236 '''Generate the documentation for an instruction
237
238 heading_level is the current Markdown heading level. It should be greater
239 than zero. For example, if it is 3, then the instruction will be introduced
240 with "### <insn_name>".
241
242 '''
243 assert heading_level > 0
244
245 parts = []
Rupert Swarbrickb7380ac2021-09-10 11:41:59 +0100246 mnem = insn.mnemonic.upper()
247 subhead = '#' * (heading_level + 1) + ' '
Rupert Swarbrick63da48e2020-07-08 10:16:57 +0100248
249 # Heading, based on mnemonic (upper-cased)
Rupert Swarbrickb7380ac2021-09-10 11:41:59 +0100250 parts.append('{} {}\n'.format('#' * heading_level, mnem))
Rupert Swarbrick63da48e2020-07-08 10:16:57 +0100251
252 # If there's a note, render it as a callout
253 if insn.note is not None:
254 parts.append('<div class="bd-callout bd-callout-warning">'
255 '<h5>Note</h5>\n\n')
256 parts.append(insn.note)
257 parts.append('\n\n</div>\n\n')
258
259 # Optional synopsis: some bold-face text expanding the mnemonic to
260 # something more understandable.
261 if insn.synopsis is not None:
262 parts.append('**{}.**\n'.format(insn.synopsis))
263
264 # Optional documentation (using existing markdown formatting). Add a blank
265 # line afterwards to separate from the syntax and operand table.
266 if insn.doc is not None:
Rupert Swarbrickb7380ac2021-09-10 11:41:59 +0100267 parts.append(insn.doc + '\n')
268 parts.append('\n')
Rupert Swarbrick63da48e2020-07-08 10:16:57 +0100269
270 # If this came from the RV32I instruction set, say so.
271 if insn.rv32i:
Rupert Swarbrickcc4e9fa2021-10-22 16:41:51 +0100272 parts.append('This instruction is defined in the '
273 'RV32I instruction set.\n\n')
Rupert Swarbrick63da48e2020-07-08 10:16:57 +0100274
Rupert Swarbricka8012132021-09-10 11:42:41 +0100275 # A list of errors that the instruction might cause.
276 if insn.errs is not None:
277 parts.append(subhead + 'Errors\n')
278 if not insn.errs:
Rupert Swarbrick9a842e02021-09-10 15:11:48 +0100279 parts.append('{} cannot cause any software errors.\n'.format(mnem))
Rupert Swarbricka8012132021-09-10 11:42:41 +0100280 else:
Rupert Swarbrick9a842e02021-09-10 15:11:48 +0100281 parts.append('{} might cause the following software errors:\n'
Rupert Swarbricka8012132021-09-10 11:42:41 +0100282 .format(mnem))
283 for desc in insn.errs:
284 parts.append('- {}\n'.format(desc))
285 parts.append('\n')
286
Rupert Swarbrickb7380ac2021-09-10 11:41:59 +0100287 # Syntax example: either given explicitly or figured out from operands
288 parts.append(subhead + 'Syntax\n')
289 parts.append("```\n")
290 parts.append(insn.mnemonic.upper() + ('' if insn.glued_ops else ' '))
291 parts.append(insn.syntax.render_doc())
292 parts.append("\n```\n\n")
293
Rupert Swarbrick74d2e292021-01-08 15:20:09 +0000294 is_pseudo = insn.literal_pseudo_op or insn.python_pseudo_op
295
296 # If we have an encoding, match up encoding fields with operands
297 if is_pseudo or insn.encoding is None:
298 o2e = None
299 e2o = None
300 else:
Rupert Swarbrickdcc25892021-01-19 10:56:18 +0000301 o2e, e2o = name_op_enc_fields(insn.name_to_operand, insn.encoding)
Rupert Swarbrick74d2e292021-01-08 15:20:09 +0000302
303 # Show the operand table if there is at least one operand and this isn't a
304 # pseudo-op.
305 if insn.operands and not is_pseudo:
Rupert Swarbrickb7380ac2021-09-10 11:41:59 +0100306 parts.append(subhead + 'Operands\n')
Rupert Swarbrick74d2e292021-01-08 15:20:09 +0000307 parts.append(render_operand_table(insn.operands, o2e))
Rupert Swarbrick63da48e2020-07-08 10:16:57 +0100308
Rupert Swarbrick56a2ac52020-07-14 09:57:04 +0100309 # Show encoding if we have one
Rupert Swarbrick74d2e292021-01-08 15:20:09 +0000310 if e2o is not None:
Rupert Swarbrickb7380ac2021-09-10 11:41:59 +0100311 parts.append(subhead + 'Encoding\n')
Rupert Swarbrick74d2e292021-01-08 15:20:09 +0000312 assert insn.encoding is not None
313 parts.append(render_encoding(insn.mnemonic, insn.encoding, e2o))
Rupert Swarbrick56a2ac52020-07-14 09:57:04 +0100314
Rupert Swarbricka2789b02020-07-24 18:25:28 +0100315 # If this is a pseudo-op with a literal translation, show it
316 if insn.literal_pseudo_op is not None:
317 parts.append(render_literal_pseudo_op(insn.literal_pseudo_op))
318
Rupert Swarbrick4077bec2020-10-05 11:50:11 +0100319 if impl is not None:
Rupert Swarbrickb7380ac2021-09-10 11:41:59 +0100320 parts.append(subhead + 'Operation\n')
Rupert Swarbrick2c116752021-01-19 10:16:33 +0000321
322 # Add a handy header to remind readers that enum operands and option
323 # operands are referred to by their integer values.
324 not_num_ops = []
325 for operand in insn.operands:
326 if ((isinstance(operand.op_type, EnumOperandType) or
327 isinstance(operand.op_type, OptionOperandType))):
328 not_num_ops.append(operand.name)
329
330 if not_num_ops:
331 if len(not_num_ops) == 1:
332 op_str = ('operand `{}` is referred to by its'
333 .format(not_num_ops[0]))
334 else:
335 op_str = ('operands {} and `{}` are referred to by their'
336 .format(', '.join('`{}`'.format(e)
337 for e in not_num_ops[:-1]),
338 not_num_ops[-1]))
339
340 parts.append('In the listing below, {} integer value.\n'
341 'The operand table above shows how this corresponds '
342 'to assembly syntax.\n\n'
343 .format(op_str))
344
Rupert Swarbrick4077bec2020-10-05 11:50:11 +0100345 # Note: No trailing newline after the inserted contents because libcst
346 # (which we use for extracting documentation) always adds a trailing
347 # newline itself.
348 parts.append('```\n'
349 '{}'
Rupert Swarbrick63da48e2020-07-08 10:16:57 +0100350 '```\n\n'
Rupert Swarbrick4077bec2020-10-05 11:50:11 +0100351 .format(impl))
Rupert Swarbrick63da48e2020-07-08 10:16:57 +0100352 return ''.join(parts)
353
354
Rupert Swarbrickd3de4bb2020-09-01 17:50:19 +0100355def render_insn_group(group: InsnGroup,
Rupert Swarbrick4077bec2020-10-05 11:50:11 +0100356 impls: Dict[str, str],
Rupert Swarbrickd3de4bb2020-09-01 17:50:19 +0100357 heading_level: int,
358 out_file: TextIO) -> None:
359 # We don't print the group heading: that's done in the top-level
360 # documentation so it makes it into the TOC.
361
362 out_file.write(group.doc + '\n\n')
363
364 if not group.insns:
365 out_file.write('No instructions in group.\n\n')
366 return
367
368 for insn in group.insns:
Rupert Swarbrick4077bec2020-10-05 11:50:11 +0100369 class_name = insn.mnemonic.replace('.', '').upper()
370 impl = impls.get(class_name)
371 out_file.write(render_insn(insn, impl, heading_level))
Rupert Swarbrickd3de4bb2020-09-01 17:50:19 +0100372
373
374def render_insns(insns: InsnsFile,
Rupert Swarbrick4077bec2020-10-05 11:50:11 +0100375 impls: Dict[str, str],
Rupert Swarbrickd3de4bb2020-09-01 17:50:19 +0100376 heading_level: int,
377 out_dir: str) -> None:
Rupert Swarbrick63da48e2020-07-08 10:16:57 +0100378 '''Render documentation for all instructions'''
Rupert Swarbrickd3de4bb2020-09-01 17:50:19 +0100379 for group in insns.groups.groups:
380 group_path = os.path.join(out_dir, group.key + '.md')
381 with open(group_path, 'w') as group_file:
Rupert Swarbrick4077bec2020-10-05 11:50:11 +0100382 render_insn_group(group, impls, heading_level, group_file)
Rupert Swarbrick63da48e2020-07-08 10:16:57 +0100383
384
385def main() -> int:
386 parser = argparse.ArgumentParser()
387 parser.add_argument('yaml_file')
Rupert Swarbrick4077bec2020-10-05 11:50:11 +0100388 parser.add_argument('py_file')
Rupert Swarbrickd3de4bb2020-09-01 17:50:19 +0100389 parser.add_argument('out_dir')
Rupert Swarbrick63da48e2020-07-08 10:16:57 +0100390
391 args = parser.parse_args()
392
393 try:
394 insns = load_file(args.yaml_file)
Rupert Swarbrick4077bec2020-10-05 11:50:11 +0100395 impls = read_implementation(args.py_file)
Rupert Swarbrick63da48e2020-07-08 10:16:57 +0100396 except RuntimeError as err:
Rupert Swarbrickd3de4bb2020-09-01 17:50:19 +0100397 print(err, file=sys.stderr)
Rupert Swarbrick63da48e2020-07-08 10:16:57 +0100398 return 1
399
Rupert Swarbrickd3de4bb2020-09-01 17:50:19 +0100400 try:
401 os.makedirs(args.out_dir, exist_ok=True)
402 except OSError as err:
403 print('Failed to create output directory {!r}: {}.'
404 .format(args.out_dir, err))
405
Rupert Swarbrick4077bec2020-10-05 11:50:11 +0100406 render_insns(insns, impls, 2, args.out_dir)
Rupert Swarbrick63da48e2020-07-08 10:16:57 +0100407 return 0
408
409
410if __name__ == '__main__':
411 sys.exit(main())