blob: 70d6d17a2545bbec1d3726e5acbe35d897cc622e [file] [log] [blame]
#!/usr/bin/env python3
#
# Copyright lowRISC contributors.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
import argparse
import fnmatch
import logging
import subprocess
from itertools import groupby
from pathlib import Path
from types import SimpleNamespace
import hjson
from tabulate import tabulate
class LicenceHeader(object):
"""Represents the licence header we want to insert"""
def __init__(self, text):
self._lines = text.strip().splitlines()
def __getitem__(self, idx):
return self._lines.__getitem__(idx)
def __len__(self):
return self._lines.__len__()
def numbered_lines(self, skip=0):
"""Returns an iterator of (line_no, line_text).
`line_no` counts from 1, and is for humans to count line numbers with.
use `skip_lines` to skip enumerating the first few lines.
"""
return enumerate(self._lines[skip:], start=1 + skip)
@property
def first_word(self):
(first_word, _) = self._lines[0].split(' ', 1)
return first_word
class LineCommentStyle(object):
"""Helpers for line-style comments."""
def __init__(self, prefix):
self.comment_prefix = str(prefix)
self.first_line_prefix = self.comment_prefix
def search_line(self, licence_first_word):
return self.comment_prefix + ' ' + licence_first_word
def expected_full_line(self, licence_line):
return self.comment_prefix + ' ' + licence_line
class DifferentFirstLineCommentStyle(LineCommentStyle):
"""Some files have a different allowable prefix for their first line."""
def __init__(self, prefix, first_line_prefix):
LineCommentStyle.__init__(self, prefix)
self.first_line_prefix = first_line_prefix
class BlockCommentStyle(object):
"""Helpers for block-style comments."""
def __init__(self, prefix, suffix):
self.comment_prefix = str(prefix)
self.comment_suffix = str(suffix)
self.first_line_prefix = self.comment_prefix
def search_line(self, licence_first_word):
return self.comment_prefix + ' ' + licence_first_word
def expected_full_line(self, licence_line):
return self.comment_prefix + ' ' + licence_line + ' ' + self.comment_suffix
SLASH_SLASH = LineCommentStyle("//")
HASH = LineCommentStyle("#")
SLASH_STAR = BlockCommentStyle("/*", "*/")
# (Priortised) Mapping of file name suffixes to CommentStyle object.
# If the suffix of your file does not match one of these, it will not be
# checked.
#
# These rules are given in priority order. Tuples of (extensions, style) higher
# in the list are matched before those later in the list, on purpose.
#
# Files that do not match any extension, or which have a style of `None` are
# not checked for a licence.
COMMENT_CHARS = [
# Hardware Files
([".svh", ".sv", ".sv.tpl"], SLASH_SLASH), # SystemVerilog
# Hardware Build Systems
([".tcl", ".sdc"], HASH), # tcl
([".core", ".core.tpl"],
DifferentFirstLineCommentStyle("#", "CAPI=2")), # FuseSoC Core Files
(["Makefile", ".mk"], HASH), # Makefiles
([".ys"], HASH), # Yosys script
([".waiver"], HASH), # AscentLint waiver files
([".vlt"], SLASH_SLASH), # Verilator configuration (waiver) files
([".vbl"], HASH), # Verible configuration files
([".el", ".el.tpl"], SLASH_SLASH), # Exclusion list
([".f"], None), # File lists
# The following two rules will inevitably bite us.
(["riviera_run.do"], HASH), # Riviera dofile
([".do"], SLASH_SLASH), # Cadence LEC dofile
# Software Files
([".c", ".c.tpl", ".h", ".h.tpl", ".cc", ".cpp"], SLASH_SLASH), # C, C++
([".S"], SLASH_SLASH), # Assembly
([".ld", ".ld.tpl"], SLASH_STAR), # Linker Scripts
([".rs"], SLASH_SLASH), # Rust
# Software Build Systems
(["meson.build", "toolchain.txt", "meson_options.txt"], HASH), # Meson
# General Tooling
([".py"], HASH), # Python
([".sh"], HASH), # Shell Scripts
(["Dockerfile"], HASH), # Dockerfiles
# Configuration
([".hjson", ".hjson.tpl"], SLASH_SLASH), # hjson
([".yml", ".yaml"], HASH), # YAML
([".toml"], HASH), # TOML
(["-requirements.txt"], HASH), # Apt and Python requirements files
(["redirector.conf"], HASH), # nginx config
# Documentation
([".md", ".md.tpl", ".html"], None), # Markdown and HTML
([".css"], SLASH_STAR), # CSS
([".scss"], SLASH_SLASH), # SCSS
# Templates (Last because there are overlaps with extensions above)
([".tpl"], HASH), # Mako templates
]
def detect_comment_char(filename):
for (suffixes, commentstyle) in COMMENT_CHARS:
for suffix in suffixes:
if filename.endswith(suffix):
return commentstyle
return None
def git_find_repo_toplevel():
git_output = subprocess.check_output(
['git', 'rev-parse', '--show-toplevel'])
return Path(git_output.decode().strip()).resolve()
def git_find_all_file_paths(top_level, search_paths):
git_output = subprocess.check_output(
["git", "-C",
str(top_level), "ls-files", "-z", "--", *search_paths])
for path in git_output.rstrip(b"\0").split(b"\0"):
yield Path(top_level, path.decode())
class ResultsTracker(object):
"""Helper for tracking results"""
def __init__(self, base_dir):
self.base_dir = base_dir
passed_count = 0
failed_count = 0
excluded_count = 0
skipped_count = 0
failing_paths = set()
@property
def total_count(self):
return self.passed_count + self.failed_count + self.skipped_count + self.excluded_count
def passed(self, path, line_no, reason):
rel_path = path.relative_to(self.base_dir)
logging.debug("%s:%d PASSED: %s", str(rel_path), line_no, reason)
self.passed_count += 1
def failed(self, path, line_no, reason):
rel_path = path.relative_to(self.base_dir)
logging.error("%s:%d FAILED: %s", str(rel_path), line_no, reason)
self.failing_paths.add(rel_path)
self.failed_count += 1
def skipped(self, path, reason):
rel_path = path.relative_to(self.base_dir)
logging.info("%s: SKIPPED: %s", str(rel_path), reason)
self.skipped_count += 1
def excluded(self, path, reason):
rel_path = path.relative_to(self.base_dir)
logging.debug("%s: EXCLUDED: %s", str(rel_path), reason)
self.excluded_count += 1
def any_failed(self):
return self.failed_count > 0
def display_nicely(self):
headers = ["Results:", "Files"]
results = [["Passed", self.passed_count],
["Failed", self.failed_count],
["Skipped", self.skipped_count],
["Excluded", self.excluded_count],
["Total", self.total_count]]
return tabulate(results, headers, tablefmt="simple")
def matches_exclude_pattern(config, file_path):
rel_path = str(file_path.relative_to(config.base_dir))
for exclude_pattern in config.exclude_paths:
if fnmatch.fnmatch(rel_path, exclude_pattern):
return True
return False
def check_paths(config, git_paths):
results = ResultsTracker(config.base_dir)
for filepath in git_find_all_file_paths(config.base_dir, git_paths):
# Skip symlinks (with message)
if filepath.is_symlink():
results.excluded(filepath, "File is a symlink")
continue
# Skip non-file
if not filepath.is_file():
continue
# Skip exclude patterns
if matches_exclude_pattern(config, filepath):
results.excluded(filepath, "Path matches exclude pattern")
continue
check_file_for_licence(config.licence, results, filepath)
return results
def check_file_for_licence(licence, results, filepath):
comment_style = detect_comment_char(filepath.name)
if comment_style is None:
results.skipped(filepath, "Unknown comment style")
return
if filepath.stat().st_size == 0:
results.skipped(filepath, "Empty file")
return
def next_line(file, line_no):
return (next(file).rstrip(), line_no + 1)
with filepath.open() as f:
licence_assumed_start = None
# Get first line
try:
line, line_no = next_line(f, 0)
except StopIteration:
results.failed(filepath, 1, "Empty file")
return
licence_search_marker = comment_style.search_line(licence.first_word)
# Check first line against the first word of licence, or against a
# possible different first line.
if not line.startswith(licence_search_marker):
if not line.startswith(comment_style.first_line_prefix):
results.failed(filepath, line_no,
"File does not start with comment")
return
try:
line, line_no = next_line(f, line_no)
except StopIteration:
results.failed(filepath, line_no,
"Reached end of file before finding licence")
return
# Skip lines that don't seem to be the first line of the licence
while not line.startswith(licence_search_marker):
try:
line, line_no = next_line(f, line_no)
except StopIteration:
results.failed(filepath, line_no,
"Reached end of file before finding licence")
return
if not line.startswith(comment_style.comment_prefix):
results.failed(filepath, line_no,
"First comment ended before licence notice")
return
# We found the marker, so we found the first line of the licence.
# The current line is in the first comment, so check the line matches the
# expected first line:
licence_assumed_start = line_no
if line != comment_style.expected_full_line(licence[0]):
results.failed(filepath, line_no, "Licence does not match")
return
for (licence_line_no, licence_line) in licence.numbered_lines(skip=1):
try:
line, line_no = next_line(f, line_no)
except StopIteration:
results.failed(filepath, line_no,
"Reached end of file before finding licence")
return
# Check against full expected line.
if line != comment_style.expected_full_line(licence_line):
results.failed(filepath, line_no, "Licence did not match")
return
results.passed(filepath, licence_assumed_start, "Licence found")
def main():
parser = argparse.ArgumentParser(
description=
"A tool to check the lowRISC licence header is in each source file")
parser.add_argument("--config",
metavar="config.hjson",
type=argparse.FileType('r', encoding='UTF-8'),
help="HJSON file to read for licence configuration.")
parser.add_argument("paths",
metavar="path",
nargs='*',
default=["."],
help="Paths to check for licence headers.")
parser.add_argument('-v',
"--verbose",
action='store_true',
dest='verbose',
help="Verbose output")
options = parser.parse_args()
if options.verbose:
logging.basicConfig(format="%(levelname)s: %(message)s",
level=logging.INFO)
else:
logging.basicConfig(format="%(levelname)s: %(message)s")
config = SimpleNamespace()
config.base_dir = git_find_repo_toplevel()
parsed_config = hjson.load(options.config)
config.licence = LicenceHeader(parsed_config['licence'])
config.exclude_paths = set(parsed_config['exclude_paths'])
results = check_paths(config, options.paths)
print(results.display_nicely())
if results.any_failed():
print("Failed:")
for path in results.failing_paths:
print(" {}".format(str(path)))
print("")
exit(1)
else:
exit(0)
if __name__ == '__main__':
main()