Dan McArdle | c6f1769 | 2022-06-22 14:53:42 -0400 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
| 2 | # Copyright lowRISC contributors. |
| 3 | # Licensed under the Apache License, Version 2.0, see LICENSE for details. |
| 4 | # SPDX-License-Identifier: Apache-2.0 |
| 5 | """generate_compilation_db.py builds compilation_commands.json from BUILD files. |
| 6 | |
| 7 | This tool runs a Bazel Action Graph query (Bazel's "aquery" command) and |
| 8 | transforms the results to produce a compilation database (aka |
| 9 | compile_commands.json). The goal is to enable semantic features like |
| 10 | jump-to-definition and cross-references in IDEs that support |
| 11 | compile_commands.json. |
| 12 | |
| 13 | The analysis.ActionGraphContainer protobuf [0] defines aquery's results format. |
| 14 | Clang informally defines the schema of compile_commands.json [1]. |
| 15 | |
| 16 | Caveat: this tool only emits the commands for building C/C++ code. |
| 17 | |
| 18 | Example: |
| 19 | util/generate_compilation_db.py --target //sw/... --out compile_commands.json |
| 20 | |
Dan McArdle | 3842477 | 2022-10-10 15:00:31 -0400 | [diff] [blame] | 21 | Tip: If your IDE complains that it cannot find headers, e.g. "gmock/gmock.h", it |
| 22 | might be telling the truth. Try building the relevant target with Bazel |
| 23 | (specifying "--config=riscv32" as necessary) and then restart clangd. |
| 24 | |
Dan McArdle | c6f1769 | 2022-06-22 14:53:42 -0400 | [diff] [blame] | 25 | [0]: https://github.com/bazelbuild/bazel/blob/master/src/main/protobuf/analysis_v2.proto |
| 26 | [1]: https://clang.llvm.org/docs/JSONCompilationDatabase.html |
Dan McArdle | c6f1769 | 2022-06-22 14:53:42 -0400 | [diff] [blame] | 27 | """ |
| 28 | |
| 29 | import argparse |
| 30 | import json |
Dan McArdle | 3842477 | 2022-10-10 15:00:31 -0400 | [diff] [blame] | 31 | import logging |
Dan McArdle | c6f1769 | 2022-06-22 14:53:42 -0400 | [diff] [blame] | 32 | import os |
| 33 | import subprocess |
Alphan Ulusoy | c1308a7 | 2022-09-22 11:41:31 -0400 | [diff] [blame] | 34 | import sys |
Dan McArdle | 3842477 | 2022-10-10 15:00:31 -0400 | [diff] [blame] | 35 | from typing import Dict, List, Tuple |
| 36 | |
| 37 | logger = logging.getLogger('generate_compilation_db') |
Dan McArdle | c6f1769 | 2022-06-22 14:53:42 -0400 | [diff] [blame] | 38 | |
| 39 | |
Alphan Ulusoy | 749ef4f | 2022-06-30 07:22:48 -0400 | [diff] [blame] | 40 | def build_id_lookup_dict(dicts: List[Dict]): |
Dan McArdle | c6f1769 | 2022-06-22 14:53:42 -0400 | [diff] [blame] | 41 | """Create a dict from `dicts` indexed by the "id" key.""" |
| 42 | lookup = {} |
| 43 | for d in dicts: |
| 44 | lookup[d['id']] = d |
| 45 | return lookup |
| 46 | |
| 47 | |
| 48 | class BazelAqueryResults: |
| 49 | """Corresponds to Bazel's analysis.ActionGraphContainer protobuf.""" |
| 50 | |
| 51 | def __init__(self, output: str): |
| 52 | parsed = json.loads(output) |
| 53 | self.actions = [ |
| 54 | BazelAqueryAction(action) for action in parsed['actions'] |
| 55 | ] |
| 56 | self.dep_sets_ = build_id_lookup_dict(parsed['depSetOfFiles']) |
| 57 | self.artifacts_ = build_id_lookup_dict(parsed['artifacts']) |
| 58 | self.path_fragments_ = build_id_lookup_dict(parsed['pathFragments']) |
| 59 | |
| 60 | def reconstruct_path(self, id: int): |
| 61 | """Reconstruct a file path from Bazel aquery fragments.""" |
| 62 | labels = [] |
| 63 | |
| 64 | while True: |
| 65 | path_fragment = self.path_fragments_[id] |
| 66 | labels.append(path_fragment['label']) |
| 67 | |
| 68 | if 'parentId' not in path_fragment: |
| 69 | break |
| 70 | id = path_fragment['parentId'] |
| 71 | |
Dan McArdle | d72d188 | 2022-10-11 12:11:32 -0400 | [diff] [blame] | 72 | # For our purposes, `os.sep.join()` should be equivalent to |
| 73 | # `os.path.join()`, but without the additional overhead. |
| 74 | return os.sep.join(reversed(labels)) |
Dan McArdle | c6f1769 | 2022-06-22 14:53:42 -0400 | [diff] [blame] | 75 | |
Alphan Ulusoy | 749ef4f | 2022-06-30 07:22:48 -0400 | [diff] [blame] | 76 | def iter_artifacts_for_dep_sets(self, dep_set_ids: List[int]): |
Dan McArdle | c6f1769 | 2022-06-22 14:53:42 -0400 | [diff] [blame] | 77 | """Iterate the reconstructed paths of all artifacts related to `dep_set_ids`.""" |
Dan McArdle | 3842477 | 2022-10-10 15:00:31 -0400 | [diff] [blame] | 78 | SOURCE_EXTENSIONS = [".h", ".c", ".cc"] |
Dan McArdle | c6f1769 | 2022-06-22 14:53:42 -0400 | [diff] [blame] | 79 | |
| 80 | dep_set_id_stack = dep_set_ids |
| 81 | while len(dep_set_id_stack) > 0: |
| 82 | dep_set_id = dep_set_id_stack.pop() |
| 83 | dep_set = self.dep_sets_[dep_set_id] |
| 84 | |
| 85 | for direct_artifact_id in dep_set.get('directArtifactIds', []): |
| 86 | artifact = self.artifacts_[direct_artifact_id] |
| 87 | path_fragment_id = artifact['pathFragmentId'] |
| 88 | path = self.reconstruct_path(path_fragment_id) |
Dan McArdle | 3842477 | 2022-10-10 15:00:31 -0400 | [diff] [blame] | 89 | if path.startswith("external/"): |
| 90 | continue |
| 91 | if not any(path.endswith(ext) for ext in SOURCE_EXTENSIONS): |
| 92 | continue |
Dan McArdle | c6f1769 | 2022-06-22 14:53:42 -0400 | [diff] [blame] | 93 | yield path |
| 94 | |
| 95 | for transitive_dep_set_id in dep_set.get('transitiveDepSetIds', |
| 96 | []): |
| 97 | dep_set_id_stack.append(transitive_dep_set_id) |
| 98 | |
| 99 | |
| 100 | class BazelAqueryAction: |
| 101 | """Corresponds to Bazel's analysis.Action protobuf.""" |
| 102 | |
Alphan Ulusoy | 749ef4f | 2022-06-30 07:22:48 -0400 | [diff] [blame] | 103 | def __init__(self, action: Dict): |
Dan McArdle | c6f1769 | 2022-06-22 14:53:42 -0400 | [diff] [blame] | 104 | self.mnemonic = action.get('mnemonic', None) |
| 105 | self.arguments = action.get('arguments', None) |
| 106 | self.input_dep_set_ids = action.get('inputDepSetIds', []) |
| 107 | |
Dan McArdle | 3842477 | 2022-10-10 15:00:31 -0400 | [diff] [blame] | 108 | def transform_arguments_for_clangd(self) -> List[str]: |
| 109 | """Return modified arguments for compatibility with Clangd. |
| 110 | |
| 111 | It appears that Clangd fails to infer the desired target from the |
| 112 | compiler name. For instance, this is the path to our cross-compiler: |
| 113 | `external/crt/toolchains/lowrisc_rv32imcb/wrappers/clang`. Specifically, |
| 114 | Clangd fails to launch a compiler instance if it sees `--march=rv32imc` |
| 115 | or `--mabi=ilp32`. |
| 116 | |
| 117 | This function explicitly tells Clangd which target we want by inserting |
| 118 | a `--target=riscv32` flag as needed. |
| 119 | """ |
| 120 | args = self.arguments |
| 121 | if not args: |
| 122 | return args |
| 123 | compiler_path = args[0] |
| 124 | if 'lowrisc_rv32imcb' in compiler_path: |
| 125 | return [compiler_path, '--target=riscv32'] + args[1:] |
| 126 | return args |
| 127 | |
Dan McArdle | c6f1769 | 2022-06-22 14:53:42 -0400 | [diff] [blame] | 128 | |
Dan McArdle | 57ff43d | 2022-10-10 11:25:39 -0400 | [diff] [blame] | 129 | class PathBuilder: |
| 130 | """Helper class that builds useful paths relative to this source file.""" |
| 131 | |
| 132 | def __init__(self, script_path): |
| 133 | util_dir = os.path.dirname(script_path) |
| 134 | self.top_dir = os.path.dirname(util_dir) |
| 135 | if self.top_dir == '': |
| 136 | raise Exception('Could not find parent of the util directory.') |
| 137 | self.bazelisk_script = os.path.join(self.top_dir, 'bazelisk.sh') |
| 138 | # Bazel creates a symlink to execRoot based on the workspace name. |
| 139 | # https://bazel.build/remote/output-directories |
| 140 | self.bazel_exec_root = os.path.join( |
| 141 | self.top_dir, f"bazel-{os.path.basename(self.top_dir)}") |
| 142 | |
| 143 | |
Dan McArdle | 3842477 | 2022-10-10 15:00:31 -0400 | [diff] [blame] | 144 | def build_compile_commands( |
| 145 | paths: PathBuilder, |
| 146 | device_build: bool) -> Tuple[List[Dict], List[Dict]]: |
Dan McArdle | c6f1769 | 2022-06-22 14:53:42 -0400 | [diff] [blame] | 147 | bazel_aquery_command = [ |
Dan McArdle | 57ff43d | 2022-10-10 11:25:39 -0400 | [diff] [blame] | 148 | paths.bazelisk_script, |
Dan McArdle | c6f1769 | 2022-06-22 14:53:42 -0400 | [diff] [blame] | 149 | 'aquery', |
| 150 | '--output=jsonproto', |
Dan McArdle | c6f1769 | 2022-06-22 14:53:42 -0400 | [diff] [blame] | 151 | ] |
Dan McArdle | 3842477 | 2022-10-10 15:00:31 -0400 | [diff] [blame] | 152 | if device_build: |
| 153 | bazel_aquery_command.append('--config=riscv32') |
Dan McArdle | 3838259 | 2022-10-11 11:29:57 -0400 | [diff] [blame] | 154 | bazel_aquery_command.append(f'mnemonic("CppCompile", {args.target})') |
Dan McArdle | 3842477 | 2022-10-10 15:00:31 -0400 | [diff] [blame] | 155 | |
| 156 | logger.info("Running bazel command: %s", bazel_aquery_command) |
Alphan Ulusoy | c1308a7 | 2022-09-22 11:41:31 -0400 | [diff] [blame] | 157 | try: |
| 158 | completed_process = subprocess.run(bazel_aquery_command, |
| 159 | stdout=subprocess.PIPE, |
| 160 | stderr=subprocess.PIPE, |
| 161 | check=True) |
| 162 | except subprocess.CalledProcessError as e: |
| 163 | print(e.stderr.decode('utf-8'), file=sys.stderr) |
| 164 | raise |
| 165 | except BaseException: |
| 166 | raise |
Dan McArdle | 3842477 | 2022-10-10 15:00:31 -0400 | [diff] [blame] | 167 | |
| 168 | logger.info("Processing output from bazel aquery") |
| 169 | aquery_results = BazelAqueryResults( |
| 170 | completed_process.stdout.decode('utf-8')) |
Dan McArdle | c6f1769 | 2022-06-22 14:53:42 -0400 | [diff] [blame] | 171 | |
| 172 | compile_commands = [] |
Dan McArdle | 3842477 | 2022-10-10 15:00:31 -0400 | [diff] [blame] | 173 | unittest_compile_commands = [] |
Dan McArdle | c6f1769 | 2022-06-22 14:53:42 -0400 | [diff] [blame] | 174 | for action in aquery_results.actions: |
Dan McArdle | 3838259 | 2022-10-11 11:29:57 -0400 | [diff] [blame] | 175 | assert action.mnemonic == 'CppCompile' |
| 176 | assert action.arguments != [] |
Dan McArdle | c6f1769 | 2022-06-22 14:53:42 -0400 | [diff] [blame] | 177 | |
Dan McArdle | 3842477 | 2022-10-10 15:00:31 -0400 | [diff] [blame] | 178 | arguments = action.transform_arguments_for_clangd() |
| 179 | |
Dan McArdle | c6f1769 | 2022-06-22 14:53:42 -0400 | [diff] [blame] | 180 | for artifact in aquery_results.iter_artifacts_for_dep_sets( |
| 181 | action.input_dep_set_ids): |
Dan McArdle | 3842477 | 2022-10-10 15:00:31 -0400 | [diff] [blame] | 182 | command = { |
Dan McArdle | 57ff43d | 2022-10-10 11:25:39 -0400 | [diff] [blame] | 183 | 'directory': paths.bazel_exec_root, |
Dan McArdle | 3842477 | 2022-10-10 15:00:31 -0400 | [diff] [blame] | 184 | 'arguments': arguments, |
Dan McArdle | 57ff43d | 2022-10-10 11:25:39 -0400 | [diff] [blame] | 185 | 'file': artifact, |
Dan McArdle | 3842477 | 2022-10-10 15:00:31 -0400 | [diff] [blame] | 186 | } |
Dan McArdle | c6f1769 | 2022-06-22 14:53:42 -0400 | [diff] [blame] | 187 | |
Dan McArdle | 3842477 | 2022-10-10 15:00:31 -0400 | [diff] [blame] | 188 | if artifact.endswith("_unittest.cc"): |
| 189 | unittest_compile_commands.append(command) |
| 190 | else: |
| 191 | compile_commands.append(command) |
| 192 | |
| 193 | return (compile_commands, unittest_compile_commands) |
| 194 | |
| 195 | |
| 196 | def main(args): |
| 197 | paths = PathBuilder(os.path.realpath(__file__)) |
| 198 | |
| 199 | device_commands, device_unittest_commands = build_compile_commands( |
| 200 | paths, device_build=True) |
| 201 | host_commands, host_unittest_commands = build_compile_commands( |
| 202 | paths, device_build=False) |
| 203 | |
| 204 | # In case there are conflicting host and device commands for "*_unittest.cc" |
| 205 | # sources, we strategically place the host commands first. Conversely, we |
| 206 | # favor the device commands for non-test sources. |
| 207 | all_compile_commands = device_commands + host_commands + \ |
| 208 | host_unittest_commands + device_unittest_commands |
| 209 | |
| 210 | logger.info("Writing compile commands to %s", args.out) |
| 211 | compile_commands_json = json.dumps(all_compile_commands, indent=4) |
Dan McArdle | c6f1769 | 2022-06-22 14:53:42 -0400 | [diff] [blame] | 212 | if not args.out: |
| 213 | print(compile_commands_json) |
| 214 | return |
| 215 | with open(args.out, 'w') as output_file: |
| 216 | output_file.write(compile_commands_json) |
| 217 | |
| 218 | |
| 219 | if __name__ == '__main__': |
| 220 | parser = argparse.ArgumentParser( |
| 221 | description=__doc__, formatter_class=argparse.RawTextHelpFormatter) |
| 222 | parser.add_argument('--target', |
| 223 | default='//...', |
| 224 | help='Bazel target. Default is "//...".') |
| 225 | parser.add_argument( |
| 226 | '--out', |
| 227 | help='Path of output file for compilation DB. Defaults to stdout.') |
Dan McArdle | 3842477 | 2022-10-10 15:00:31 -0400 | [diff] [blame] | 228 | |
| 229 | if len(sys.argv) == 1: |
| 230 | parser.print_help() |
| 231 | sys.exit(1) |
| 232 | |
Dan McArdle | c6f1769 | 2022-06-22 14:53:42 -0400 | [diff] [blame] | 233 | args = parser.parse_args() |
| 234 | |
Dan McArdle | 3842477 | 2022-10-10 15:00:31 -0400 | [diff] [blame] | 235 | logging.basicConfig(format='%(asctime)s %(message)s') |
| 236 | logger.setLevel(logging.DEBUG) |
| 237 | |
Dan McArdle | c6f1769 | 2022-06-22 14:53:42 -0400 | [diff] [blame] | 238 | main(args) |