blob: 5237da45980b14450dd66b916200d9ffe015ea18 [file] [log] [blame]
#!/usr/bin/env python3
#
# Copyright lowRISC contributors.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
import argparse
import fnmatch
import logging
import re
import subprocess
from pathlib import Path
from types import SimpleNamespace
import hjson
from tabulate import tabulate
class LicenceHeader(object):
"""Represents the licence header we want to insert"""
def __init__(self, text):
self._lines = text.strip().splitlines()
def __getitem__(self, idx):
return self._lines.__getitem__(idx)
def __len__(self):
return self._lines.__len__()
def numbered_lines(self, skip=0):
"""Returns an iterator of (line_no, line_text).
`line_no` counts from 1, and is for humans to count line numbers with.
use `skip_lines` to skip enumerating the first few lines.
"""
return enumerate(self._lines[skip:], start=1 + skip)
@property
def first_word(self):
(first_word, _) = self._lines[0].split(' ', 1)
return first_word
class CommentStyle:
'''Base class for comment style objects'''
def __init__(self, first_line_prefix, comment_prefix):
self.first_line_prefix = first_line_prefix
self.comment_prefix = comment_prefix
def search_line_pattern(self, licence_first_word):
return re.compile(
re.escape(self.comment_prefix + ' ' + licence_first_word))
def full_line_parts(self, licence_line):
return [re.escape(self.comment_prefix), licence_line]
def full_line_pattern(self, licence_line):
'''Returns a regex pattern which matches one line of licence text.'''
return re.compile(' '.join(self.full_line_parts(licence_line)))
class LineCommentStyle(CommentStyle):
"""Helpers for line-style comments."""
def __init__(self, prefix):
super().__init__(prefix, prefix)
class DifferentFirstLineCommentStyle(CommentStyle):
"""Some files have a different allowable prefix for their first line."""
def __init__(self, first_line_prefix, prefix):
super().__init__(first_line_prefix, prefix)
class BlockCommentStyle(CommentStyle):
"""Helpers for block-style comments."""
def __init__(self, prefix, suffix):
super().__init__(prefix, prefix)
self.comment_suffix = str(suffix)
def full_line_parts(self, licence_line):
return [
re.escape(self.comment_prefix), licence_line,
re.escape(self.comment_suffix)
]
SLASH_SLASH = '//'
HASH = '#'
SLASH_STAR = '/*'
COMMENT_STYLES = {
SLASH_SLASH: LineCommentStyle("//"),
HASH: LineCommentStyle("#"),
SLASH_STAR: BlockCommentStyle("/*", "*/"),
'corefile': DifferentFirstLineCommentStyle("CAPI=2", "#")
}
# (Prioritised) Mapping of file name suffixes to comment style. If the suffix
# of your file does not match one of these, it will not be checked.
#
# Each entry is a pair (suffixes, styles). suffixes is a list of file suffixes:
# if a filename matches one of these suffixes, we'll use the styles in styles.
# styles is either a string or a list of strings. If there is one or more
# strings, these strings must all be keys of COMMENT_STYLES and they give the
# different comment styles that are acceptable for the file type.
#
# These rules are given in priority order. Tuples higher in the list are
# matched before those later in the list, on purpose.
#
# Files that either do not match any extension or that have an empty list of
# styles are not checked for a licence.
COMMENT_CHARS = [
# Hardware Files
([".svh", ".sv", ".sv.tpl"], SLASH_SLASH), # SystemVerilog
# Hardware Build Systems
([".tcl", ".sdc"], HASH), # tcl
([".core", ".core.tpl"], 'corefile'), # FuseSoC Core Files
(["Makefile", ".mk"], HASH), # Makefiles
([".ys"], HASH), # Yosys script
([".waiver"], HASH), # AscentLint waiver files
([".vlt"], SLASH_SLASH), # Verilator configuration (waiver) files
([".vbl"], HASH), # Verible configuration files
([".el", ".el.tpl"], SLASH_SLASH), # Exclusion list
([".cfg", ".cfg.tpl"], [SLASH_SLASH,
HASH]), # Kinds of configuration files
([".f"], []), # File lists (not checked)
# The following two rules will inevitably bite us.
(["riviera_run.do"], HASH), # Riviera dofile
([".do"], SLASH_SLASH), # Cadence LEC dofile
# Software Files
([".c", ".c.tpl", ".h", ".h.tpl", ".cc", ".cpp"], SLASH_SLASH), # C, C++
([".def"], SLASH_SLASH), # C, C++ X-Include List Declaration Files
([".S"], [SLASH_SLASH, SLASH_STAR]), # Assembly (With Preprocessing)
([".s"], SLASH_STAR), # Assembly (Without Preprocessing)
([".ld", ".ld.tpl"], SLASH_STAR), # Linker Scripts
([".rs", ".rs.tpl"], SLASH_SLASH), # Rust
# Software Build Systems
(["meson.build", "toolchain.txt", "meson_options.txt"], HASH), # Meson
# General Tooling
([".py"], HASH), # Python
([".sh"], HASH), # Shell Scripts
(["Dockerfile"], HASH), # Dockerfiles
# Configuration
([".hjson", ".hjson.tpl"], SLASH_SLASH), # hjson
([".yml", ".yaml"], HASH), # YAML
([".toml"], HASH), # TOML
(["-requirements.txt"], HASH), # Apt and Python requirements files
(["redirector.conf"], HASH), # nginx config
# Documentation
([".md", ".md.tpl", ".html"], []), # Markdown and HTML (not checked)
([".css"], SLASH_STAR), # CSS
([".scss"], SLASH_SLASH), # SCSS
# Templates (Last because there are overlaps with extensions above)
([".tpl"], HASH), # Mako templates
]
class LicenceMatcher:
'''An object to match a given licence at the start of a file'''
def __init__(self, comment_style, licence, match_regex):
self.style = comment_style
self.expected_lines = list()
# In case we are using regex matching we can pass the full line "as is"
if match_regex:
for i, ll in enumerate(licence):
try:
self.expected_lines.append(
comment_style.full_line_pattern(ll))
# Catch any regex error here and raise a runtime error.
except re.error as e:
raise RuntimeError(
"Can't compile line {} of the licence as a regular expression. Saw `{}`: {}"
.format(i, e.pattern[e.pos], e.msg))
# use the "first line" as a licence marker
self.search_marker = self.expected_lines[0]
# For non-regex matching we need to escape everything.
# This can never throw an exception as everything has been escaped and
# therefore is always a legal regex.
else:
self.search_marker = comment_style.search_line_pattern(
licence.first_word)
self.expected_lines = [
comment_style.full_line_pattern(re.escape(ll))
for ll in licence
]
self.lines_left = []
def looks_like_first_line_comment(self, line):
return line.startswith(self.style.first_line_prefix)
def looks_like_comment(self, line):
return line.startswith(self.style.comment_prefix)
def looks_like_first_line(self, line):
return self.search_marker.match(line) is not None
def start(self):
'''Reset lines_left, to match at the start of the licence'''
self.lines_left = self.expected_lines
def take_line(self, line):
'''Check whether line matches the next line of the licence.
Returns a pair (matched, done). matched is true if the line matched. If
this was the last line of the licence, done is true. On a match, this
increments an internal counter, so the next call to take_line will
match against the next line of the licence.
'''
# If we have no more lines to match, claim a match and that we're done.
# This shouldn't happen in practice, except if the configuration has an
# empty licence.
if not self.lines_left:
return (True, True)
next_expected = self.lines_left[0]
matched = next_expected.fullmatch(line)
if not matched:
return (False, False)
if matched:
self.lines_left = self.lines_left[1:]
return (True, not self.lines_left)
def detect_comment_char(all_matchers, filename):
'''Find zero or more LicenceMatcher objects for filename
all_matchers should be a dict like COMMENT_STYLES, but where the values are
the corresponding LicenceMatcher objects.
'''
found = None
for (suffixes, keys) in COMMENT_CHARS:
if found is not None:
break
for suffix in suffixes:
if filename.endswith(suffix):
found = keys
break
if found is None:
return []
if not isinstance(found, list):
assert isinstance(found, str)
found = [found]
return [all_matchers[key] for key in found]
def git_find_repo_toplevel():
git_output = subprocess.check_output(
['git', 'rev-parse', '--show-toplevel'])
return Path(git_output.decode().strip()).resolve()
def git_find_all_file_paths(top_level, search_paths):
git_output = subprocess.check_output(
["git", "-C",
str(top_level), "ls-files", "-z", "--", *search_paths])
for path in git_output.rstrip(b"\0").split(b"\0"):
yield Path(top_level, path.decode())
class ResultsTracker(object):
"""Helper for tracking results"""
def __init__(self, base_dir):
self.base_dir = base_dir
passed_count = 0
failed_count = 0
excluded_count = 0
skipped_count = 0
failing_paths = set()
@property
def total_count(self):
return (self.passed_count + self.failed_count + self.skipped_count +
self.excluded_count)
def passed(self, path, line_no, reason):
rel_path = path.relative_to(self.base_dir)
logging.debug("%s:%d PASSED: %s", str(rel_path), line_no, reason)
self.passed_count += 1
def failed(self, path, line_no, reason):
rel_path = path.relative_to(self.base_dir)
logging.error("%s:%d FAILED: %s", str(rel_path), line_no, reason)
self.failing_paths.add(rel_path)
self.failed_count += 1
def skipped(self, path, reason):
rel_path = path.relative_to(self.base_dir)
logging.info("%s: SKIPPED: %s", str(rel_path), reason)
self.skipped_count += 1
def excluded(self, path, reason):
rel_path = path.relative_to(self.base_dir)
logging.debug("%s: EXCLUDED: %s", str(rel_path), reason)
self.excluded_count += 1
def any_failed(self):
return self.failed_count > 0
def display_nicely(self):
headers = ["Results:", "Files"]
results = [["Passed", self.passed_count],
["Failed", self.failed_count],
["Skipped", self.skipped_count],
["Excluded", self.excluded_count],
["Total", self.total_count]]
return tabulate(results, headers, tablefmt="simple")
def matches_exclude_pattern(config, file_path):
rel_path = str(file_path.relative_to(config.base_dir))
for exclude_pattern in config.exclude_paths:
if fnmatch.fnmatch(rel_path, exclude_pattern):
return True
return False
def check_paths(config, git_paths):
results = ResultsTracker(config.base_dir)
try:
all_matchers = {
key: LicenceMatcher(style, config.licence, config.match_regex)
for key, style in COMMENT_STYLES.items()
}
except RuntimeError as e:
exit(e)
for filepath in git_find_all_file_paths(config.base_dir, git_paths):
# Skip symlinks (with message)
if filepath.is_symlink():
results.excluded(filepath, "File is a symlink")
continue
# Skip non-file
if not filepath.is_file():
continue
# Skip exclude patterns
if matches_exclude_pattern(config, filepath):
results.excluded(filepath, "Path matches exclude pattern")
continue
check_file_for_licence(all_matchers, results, filepath)
return results
def check_file_for_licence(all_matchers, results, filepath):
matchers = detect_comment_char(all_matchers, filepath.name)
if not matchers:
results.skipped(filepath, "Unknown comment style")
return
if filepath.stat().st_size == 0:
results.skipped(filepath, "Empty file")
return
problems = []
for matcher in matchers:
good, line_num, msg = check_file_with_matcher(matcher, filepath)
if good:
results.passed(filepath, line_num, msg)
return
else:
problems.append((line_num, msg))
# If we get here, we didn't find a matching licence
for line_num, msg in problems:
results.failed(filepath, line_num, msg)
def check_file_with_matcher(matcher, filepath):
'''Check the file at filepath against matcher.
Returns a tuple (is_good, line_number, msg). is_good is True on success;
False on failure. line_number is the position where the licence was found
(on success) or where we gave up searching for it (on failure). msg is the
associated success or error message.
'''
def next_line(file, line_no):
return (next(file).rstrip(), line_no + 1)
with filepath.open() as f:
licence_assumed_start = None
# Get first line
try:
line, line_no = next_line(f, 0)
except StopIteration:
return (False, 1, "Empty file")
# Check first line against the first word of licence, or against a
# possible different first line.
if not matcher.looks_like_first_line(line):
if not matcher.looks_like_first_line_comment(line):
return (False, line_no, "File does not start with comment")
try:
line, line_no = next_line(f, line_no)
except StopIteration:
return (False, line_no,
"Reached end of file before finding licence")
# Skip lines that don't seem to be the first line of the licence
while not matcher.looks_like_first_line(line):
try:
line, line_no = next_line(f, line_no)
except StopIteration:
return (False, line_no,
"Reached end of file before finding licence")
if not matcher.looks_like_comment(line):
return (False, line_no,
"First comment ended before licence notice")
# We found the marker, so we found the first line of the licence. The
# current line is in the first comment, so check the line matches the
# expected first line:
licence_assumed_start = line_no
matcher.start()
matched, done = matcher.take_line(line)
if not matched:
return (False, line_no, "Licence does not match")
while not done:
try:
line, line_no = next_line(f, line_no)
except StopIteration:
return (False, line_no,
"Reached end of file before finding licence")
# Check against full expected line.
matched, done = matcher.take_line(line)
if not matched:
return (False, line_no, "Licence did not match")
return (True, licence_assumed_start, "Licence found")
def main():
desc = "A tool to check the lowRISC licence header is in each source file"
parser = argparse.ArgumentParser(description=desc)
parser.add_argument("--config",
metavar="config.hjson",
type=argparse.FileType('r', encoding='UTF-8'),
required=True,
help="HJSON file to read for licence configuration.")
parser.add_argument("paths",
metavar="path",
nargs='*',
default=["."],
help="Paths to check for licence headers.")
parser.add_argument('-v',
"--verbose",
action='store_true',
dest='verbose',
help="Verbose output")
options = parser.parse_args()
if options.verbose:
logging.basicConfig(format="%(levelname)s: %(message)s",
level=logging.INFO)
else:
logging.basicConfig(format="%(levelname)s: %(message)s")
config = SimpleNamespace()
config.base_dir = git_find_repo_toplevel()
parsed_config = hjson.load(options.config)
config.licence = LicenceHeader(parsed_config['licence'])
config.exclude_paths = set(parsed_config['exclude_paths'])
# Check whether we should use regex matching or full string matching.
match_regex = parsed_config.get('match_regex', 'false')
if match_regex not in ['true', 'false']:
print('Invalid value for match_regex: {!r}. '
'Should be "true" or "false".'.format(match_regex))
exit(1)
config.match_regex = match_regex == 'true'
results = check_paths(config, options.paths)
print(results.display_nicely())
if results.any_failed():
print("Failed:")
for path in results.failing_paths:
print(" {}".format(str(path)))
print("")
exit(1)
else:
exit(0)
if __name__ == '__main__':
main()