| #!/usr/bin/env python3 |
| # |
| # Copyright lowRISC contributors. |
| # Licensed under the Apache License, Version 2.0, see LICENSE for details. |
| # SPDX-License-Identifier: Apache-2.0 |
| |
| import argparse |
| import fnmatch |
| import logging |
| import subprocess |
| from itertools import groupby |
| from pathlib import Path |
| from types import SimpleNamespace |
| |
| import hjson |
| from tabulate import tabulate |
| |
| |
| class LicenceHeader(object): |
| """Represents the licence header we want to insert""" |
| def __init__(self, text): |
| self._lines = text.strip().splitlines() |
| |
| def __getitem__(self, idx): |
| return self._lines.__getitem__(idx) |
| |
| def __len__(self): |
| return self._lines.__len__() |
| |
| def numbered_lines(self, skip=0): |
| """Returns an iterator of (line_no, line_text). |
| |
| `line_no` counts from 1, and is for humans to count line numbers with. |
| use `skip_lines` to skip enumerating the first few lines. |
| """ |
| return enumerate(self._lines[skip:], start=1 + skip) |
| |
| @property |
| def first_word(self): |
| (first_word, _) = self._lines[0].split(' ', 1) |
| return first_word |
| |
| |
| class LineCommentStyle(object): |
| """Helpers for line-style comments.""" |
| def __init__(self, prefix): |
| self.comment_prefix = str(prefix) |
| self.first_line_prefix = self.comment_prefix |
| |
| def search_line(self, licence_first_word): |
| return self.comment_prefix + ' ' + licence_first_word |
| |
| def expected_full_line(self, licence_line): |
| return self.comment_prefix + ' ' + licence_line |
| |
| |
| class DifferentFirstLineCommentStyle(LineCommentStyle): |
| """Some files have a different allowable prefix for their first line.""" |
| def __init__(self, prefix, first_line_prefix): |
| LineCommentStyle.__init__(self, prefix) |
| self.first_line_prefix = first_line_prefix |
| |
| |
| class BlockCommentStyle(object): |
| """Helpers for block-style comments.""" |
| def __init__(self, prefix, suffix): |
| self.comment_prefix = str(prefix) |
| self.comment_suffix = str(suffix) |
| self.first_line_prefix = self.comment_prefix |
| |
| def search_line(self, licence_first_word): |
| return self.comment_prefix + ' ' + licence_first_word |
| |
| def expected_full_line(self, licence_line): |
| return self.comment_prefix + ' ' + licence_line + ' ' + self.comment_suffix |
| |
| |
| SLASH_SLASH = LineCommentStyle("//") |
| HASH = LineCommentStyle("#") |
| SLASH_STAR = BlockCommentStyle("/*", "*/") |
| |
| # (Priortised) Mapping of file name suffixes to CommentStyle object. |
| # If the suffix of your file does not match one of these, it will not be |
| # checked. |
| # |
| # These rules are given in priority order. Tuples of (extensions, style) higher |
| # in the list are matched before those later in the list, on purpose. |
| # |
| # Files that do not match any extension, or which have a style of `None` are |
| # not checked for a licence. |
| COMMENT_CHARS = [ |
| # Hardware Files |
| ([".svh", ".sv", ".sv.tpl"], SLASH_SLASH), # SystemVerilog |
| |
| # Hardware Build Systems |
| ([".tcl", ".sdc"], HASH), # tcl |
| ([".core", ".core.tpl"], |
| DifferentFirstLineCommentStyle("#", "CAPI=2")), # FuseSoC Core Files |
| (["Makefile", ".mk"], HASH), # Makefiles |
| ([".ys"], HASH), # Yosys script |
| ([".waiver"], HASH), # AscentLint waiver files |
| ([".vlt"], SLASH_SLASH), # Verilator configuration (waiver) files |
| ([".vbl"], HASH), # Verible configuration files |
| ([".el", ".el.tpl"], SLASH_SLASH), # Exclusion list |
| ([".f"], None), # File lists |
| |
| # The following two rules will inevitably bite us. |
| (["riviera_run.do"], HASH), # Riviera dofile |
| ([".do"], SLASH_SLASH), # Cadence LEC dofile |
| |
| # Software Files |
| ([".c", ".c.tpl", ".h", ".h.tpl", ".cc", ".cpp"], SLASH_SLASH), # C, C++ |
| ([".S"], SLASH_SLASH), # Assembly |
| ([".ld", ".ld.tpl"], SLASH_STAR), # Linker Scripts |
| ([".rs"], SLASH_SLASH), # Rust |
| |
| # Software Build Systems |
| (["meson.build", "toolchain.txt", "meson_options.txt"], HASH), # Meson |
| |
| # General Tooling |
| ([".py"], HASH), # Python |
| ([".sh"], HASH), # Shell Scripts |
| (["Dockerfile"], HASH), # Dockerfiles |
| |
| # Configuration |
| ([".hjson", ".hjson.tpl"], SLASH_SLASH), # hjson |
| ([".yml", ".yaml"], HASH), # YAML |
| ([".toml"], HASH), # TOML |
| (["-requirements.txt"], HASH), # Apt and Python requirements files |
| (["redirector.conf"], HASH), # nginx config |
| |
| # Documentation |
| ([".md", ".md.tpl", ".html"], None), # Markdown and HTML |
| ([".css"], SLASH_STAR), # CSS |
| ([".scss"], SLASH_SLASH), # SCSS |
| |
| # Templates (Last because there are overlaps with extensions above) |
| ([".tpl"], HASH), # Mako templates |
| ] |
| |
| |
| def detect_comment_char(filename): |
| for (suffixes, commentstyle) in COMMENT_CHARS: |
| for suffix in suffixes: |
| if filename.endswith(suffix): |
| return commentstyle |
| |
| return None |
| |
| |
| def git_find_repo_toplevel(): |
| git_output = subprocess.check_output( |
| ['git', 'rev-parse', '--show-toplevel']) |
| return Path(git_output.decode().strip()).resolve() |
| |
| |
| def git_find_all_file_paths(top_level, search_paths): |
| git_output = subprocess.check_output( |
| ["git", "-C", |
| str(top_level), "ls-files", "-z", "--", *search_paths]) |
| for path in git_output.rstrip(b"\0").split(b"\0"): |
| yield Path(top_level, path.decode()) |
| |
| |
| class ResultsTracker(object): |
| """Helper for tracking results""" |
| def __init__(self, base_dir): |
| self.base_dir = base_dir |
| |
| passed_count = 0 |
| failed_count = 0 |
| excluded_count = 0 |
| skipped_count = 0 |
| |
| failing_paths = set() |
| |
| @property |
| def total_count(self): |
| return self.passed_count + self.failed_count + self.skipped_count + self.excluded_count |
| |
| def passed(self, path, line_no, reason): |
| rel_path = path.relative_to(self.base_dir) |
| logging.debug("%s:%d PASSED: %s", str(rel_path), line_no, reason) |
| self.passed_count += 1 |
| |
| def failed(self, path, line_no, reason): |
| rel_path = path.relative_to(self.base_dir) |
| logging.error("%s:%d FAILED: %s", str(rel_path), line_no, reason) |
| self.failing_paths.add(rel_path) |
| self.failed_count += 1 |
| |
| def skipped(self, path, reason): |
| rel_path = path.relative_to(self.base_dir) |
| logging.info("%s: SKIPPED: %s", str(rel_path), reason) |
| self.skipped_count += 1 |
| |
| def excluded(self, path, reason): |
| rel_path = path.relative_to(self.base_dir) |
| logging.debug("%s: EXCLUDED: %s", str(rel_path), reason) |
| self.excluded_count += 1 |
| |
| def any_failed(self): |
| return self.failed_count > 0 |
| |
| def display_nicely(self): |
| headers = ["Results:", "Files"] |
| results = [["Passed", self.passed_count], |
| ["Failed", self.failed_count], |
| ["Skipped", self.skipped_count], |
| ["Excluded", self.excluded_count], |
| ["Total", self.total_count]] |
| |
| return tabulate(results, headers, tablefmt="simple") |
| |
| |
| def matches_exclude_pattern(config, file_path): |
| rel_path = str(file_path.relative_to(config.base_dir)) |
| for exclude_pattern in config.exclude_paths: |
| if fnmatch.fnmatch(rel_path, exclude_pattern): |
| return True |
| return False |
| |
| |
| def check_paths(config, git_paths): |
| results = ResultsTracker(config.base_dir) |
| |
| for filepath in git_find_all_file_paths(config.base_dir, git_paths): |
| # Skip symlinks (with message) |
| if filepath.is_symlink(): |
| results.excluded(filepath, "File is a symlink") |
| continue |
| |
| # Skip non-file |
| if not filepath.is_file(): |
| continue |
| |
| # Skip exclude patterns |
| if matches_exclude_pattern(config, filepath): |
| results.excluded(filepath, "Path matches exclude pattern") |
| continue |
| |
| check_file_for_licence(config.licence, results, filepath) |
| |
| return results |
| |
| |
| def check_file_for_licence(licence, results, filepath): |
| comment_style = detect_comment_char(filepath.name) |
| |
| if comment_style is None: |
| results.skipped(filepath, "Unknown comment style") |
| return |
| |
| if filepath.stat().st_size == 0: |
| results.skipped(filepath, "Empty file") |
| return |
| |
| def next_line(file, line_no): |
| return (next(file).rstrip(), line_no + 1) |
| |
| with filepath.open() as f: |
| licence_assumed_start = None |
| |
| # Get first line |
| try: |
| line, line_no = next_line(f, 0) |
| except StopIteration: |
| results.failed(filepath, 1, "Empty file") |
| return |
| |
| licence_search_marker = comment_style.search_line(licence.first_word) |
| |
| # Check first line against the first word of licence, or against a |
| # possible different first line. |
| if not line.startswith(licence_search_marker): |
| if not line.startswith(comment_style.first_line_prefix): |
| results.failed(filepath, line_no, |
| "File does not start with comment") |
| return |
| |
| try: |
| line, line_no = next_line(f, line_no) |
| except StopIteration: |
| results.failed(filepath, line_no, |
| "Reached end of file before finding licence") |
| return |
| |
| # Skip lines that don't seem to be the first line of the licence |
| while not line.startswith(licence_search_marker): |
| try: |
| line, line_no = next_line(f, line_no) |
| except StopIteration: |
| results.failed(filepath, line_no, |
| "Reached end of file before finding licence") |
| return |
| |
| if not line.startswith(comment_style.comment_prefix): |
| results.failed(filepath, line_no, |
| "First comment ended before licence notice") |
| return |
| |
| # We found the marker, so we found the first line of the licence. |
| # The current line is in the first comment, so check the line matches the |
| # expected first line: |
| licence_assumed_start = line_no |
| if line != comment_style.expected_full_line(licence[0]): |
| results.failed(filepath, line_no, "Licence does not match") |
| return |
| |
| for (licence_line_no, licence_line) in licence.numbered_lines(skip=1): |
| try: |
| line, line_no = next_line(f, line_no) |
| except StopIteration: |
| results.failed(filepath, line_no, |
| "Reached end of file before finding licence") |
| return |
| |
| # Check against full expected line. |
| if line != comment_style.expected_full_line(licence_line): |
| results.failed(filepath, line_no, "Licence did not match") |
| return |
| |
| results.passed(filepath, licence_assumed_start, "Licence found") |
| |
| |
| def main(): |
| parser = argparse.ArgumentParser( |
| description= |
| "A tool to check the lowRISC licence header is in each source file") |
| parser.add_argument("--config", |
| metavar="config.hjson", |
| type=argparse.FileType('r', encoding='UTF-8'), |
| help="HJSON file to read for licence configuration.") |
| parser.add_argument("paths", |
| metavar="path", |
| nargs='*', |
| default=["."], |
| help="Paths to check for licence headers.") |
| parser.add_argument('-v', |
| "--verbose", |
| action='store_true', |
| dest='verbose', |
| help="Verbose output") |
| |
| options = parser.parse_args() |
| |
| if options.verbose: |
| logging.basicConfig(format="%(levelname)s: %(message)s", |
| level=logging.INFO) |
| else: |
| logging.basicConfig(format="%(levelname)s: %(message)s") |
| |
| config = SimpleNamespace() |
| config.base_dir = git_find_repo_toplevel() |
| |
| parsed_config = hjson.load(options.config) |
| |
| config.licence = LicenceHeader(parsed_config['licence']) |
| config.exclude_paths = set(parsed_config['exclude_paths']) |
| |
| results = check_paths(config, options.paths) |
| |
| print(results.display_nicely()) |
| |
| if results.any_failed(): |
| print("Failed:") |
| for path in results.failing_paths: |
| print(" {}".format(str(path))) |
| print("") |
| exit(1) |
| else: |
| exit(0) |
| |
| |
| if __name__ == '__main__': |
| main() |