[util] Add util/generate_compilation_db.py
This script generates compile_commands.json.
Signed-off-by: Dan McArdle <dmcardle@google.com>
diff --git a/util/generate_compilation_db.py b/util/generate_compilation_db.py
new file mode 100755
index 0000000..19587ed
--- /dev/null
+++ b/util/generate_compilation_db.py
@@ -0,0 +1,166 @@
+#!/usr/bin/env python3
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+"""generate_compilation_db.py builds compilation_commands.json from BUILD files.
+
+This tool runs a Bazel Action Graph query (Bazel's "aquery" command) and
+transforms the results to produce a compilation database (aka
+compile_commands.json). The goal is to enable semantic features like
+jump-to-definition and cross-references in IDEs that support
+compile_commands.json.
+
+The analysis.ActionGraphContainer protobuf [0] defines aquery's results format.
+Clang informally defines the schema of compile_commands.json [1].
+
+Caveat: this tool only emits the commands for building C/C++ code.
+
+Example:
+ util/generate_compilation_db.py --target //sw/... --out compile_commands.json
+
+[0]: https://github.com/bazelbuild/bazel/blob/master/src/main/protobuf/analysis_v2.proto
+[1]: https://clang.llvm.org/docs/JSONCompilationDatabase.html
+
+"""
+
+import argparse
+import json
+import os
+import subprocess
+
+
+def build_id_lookup_dict(dicts: list[dict]):
+ """Create a dict from `dicts` indexed by the "id" key."""
+ lookup = {}
+ for d in dicts:
+ lookup[d['id']] = d
+ return lookup
+
+
+class BazelAqueryResults:
+ """Corresponds to Bazel's analysis.ActionGraphContainer protobuf."""
+
+ def __init__(self, output: str):
+ parsed = json.loads(output)
+ self.actions = [
+ BazelAqueryAction(action) for action in parsed['actions']
+ ]
+ self.dep_sets_ = build_id_lookup_dict(parsed['depSetOfFiles'])
+ self.artifacts_ = build_id_lookup_dict(parsed['artifacts'])
+ self.path_fragments_ = build_id_lookup_dict(parsed['pathFragments'])
+
+ def reconstruct_path(self, id: int):
+ """Reconstruct a file path from Bazel aquery fragments."""
+ labels = []
+
+ while True:
+ path_fragment = self.path_fragments_[id]
+ labels.append(path_fragment['label'])
+
+ if 'parentId' not in path_fragment:
+ break
+ id = path_fragment['parentId']
+
+ return os.path.join(*reversed(labels))
+
+ def iter_artifacts_for_dep_sets(self, dep_set_ids: list[int]):
+ """Iterate the reconstructed paths of all artifacts related to `dep_set_ids`."""
+
+ dep_set_id_stack = dep_set_ids
+ while len(dep_set_id_stack) > 0:
+ dep_set_id = dep_set_id_stack.pop()
+ dep_set = self.dep_sets_[dep_set_id]
+
+ for direct_artifact_id in dep_set.get('directArtifactIds', []):
+ artifact = self.artifacts_[direct_artifact_id]
+ path_fragment_id = artifact['pathFragmentId']
+ path = self.reconstruct_path(path_fragment_id)
+ yield path
+
+ for transitive_dep_set_id in dep_set.get('transitiveDepSetIds',
+ []):
+ dep_set_id_stack.append(transitive_dep_set_id)
+
+
+class BazelAqueryAction:
+ """Corresponds to Bazel's analysis.Action protobuf."""
+
+ def __init__(self, action: dict):
+ self.mnemonic = action.get('mnemonic', None)
+ self.arguments = action.get('arguments', None)
+ self.input_dep_set_ids = action.get('inputDepSetIds', [])
+
+ def hackily_hardcode_googletest_includes(self):
+ """Add the include paths for googletest (gtest and gmock) to `self.arguments`."""
+ if not self.arguments[0].endswith('gcc'):
+ return
+
+ new_args = []
+ did_insert = False
+ for arg in self.arguments:
+ if arg.startswith('-iquote') and not did_insert:
+ new_args.extend([
+ '-iquote',
+ 'bazel-opentitan/external/googletest/googletest/include',
+ '-iquote',
+ 'bazel-opentitan/external/googletest/googlemock/include'
+ ])
+ did_insert = True
+ new_args.append(arg)
+ self.arguments = new_args
+
+
+def main(args):
+ script_path = os.path.realpath(__file__)
+ utils_dir = os.path.dirname(script_path)
+ top_dir = os.path.dirname(utils_dir)
+
+ bazel_aquery_command = [
+ os.path.join(top_dir, 'bazelisk.sh'),
+ 'aquery',
+ '--output=jsonproto',
+ args.target,
+ ]
+ completed_process = subprocess.run(bazel_aquery_command,
+ capture_output=True,
+ check=True,
+ text=True)
+ aquery_results = BazelAqueryResults(completed_process.stdout)
+
+ compile_commands = []
+ for action in aquery_results.actions:
+ if action.mnemonic != 'CppCompile' or action.arguments == []:
+ continue
+
+ action.hackily_hardcode_googletest_includes()
+
+ for artifact in aquery_results.iter_artifacts_for_dep_sets(
+ action.input_dep_set_ids):
+ compile_commands.append({
+ 'directory': top_dir,
+ 'arguments': action.arguments,
+ 'file': artifact,
+ })
+
+ compile_commands_json = json.dumps(compile_commands,
+ sort_keys=True,
+ indent=4)
+ if not args.out:
+ print(compile_commands_json)
+ return
+ with open(args.out, 'w') as output_file:
+ output_file.write(compile_commands_json)
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(
+ description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
+ parser.add_argument('--target',
+ default='//...',
+ help='Bazel target. Default is "//...".')
+ parser.add_argument(
+ '--out',
+ help='Path of output file for compilation DB. Defaults to stdout.')
+ args = parser.parse_args()
+
+ main(args)