util/lowrisc_misc-linters/licence-checker/licence-checker.py - 3p/lowrisc/opentitan - Git at Google

 #!/usr/bin/env python3
 #
 # Copyright lowRISC contributors.
 # Licensed under the Apache License, Version 2.0, see LICENSE for details.
 # SPDX-License-Identifier: Apache-2.0

 import argparse
 import fnmatch
 import logging
 import re
 import subprocess
 from pathlib import Path
 from types import SimpleNamespace

 import hjson
 from tabulate import tabulate


 class LicenceHeader(object):
     """Represents the licence header we want to insert"""
     def __init__(self, text):
         self._lines = text.strip().splitlines()

     def __getitem__(self, idx):
         return self._lines.__getitem__(idx)

     def __len__(self):
         return self._lines.__len__()

     def numbered_lines(self, skip=0):
         """Returns an iterator of (line_no, line_text).

         `line_no` counts from 1, and is for humans to count line numbers with.
         use `skip_lines` to skip enumerating the first few lines.
         """
         return enumerate(self._lines[skip:], start=1 + skip)

     @property
     def first_word(self):
         (first_word, _) = self._lines[0].split(' ', 1)
         return first_word


 class CommentStyle:
     '''Base class for comment style objects'''
     def __init__(self, first_line_prefix, comment_prefix):
         self.first_line_prefix = first_line_prefix
         self.comment_prefix = comment_prefix

     def search_line_pattern(self, licence_first_word):
         return re.compile(
             re.escape(self.comment_prefix + ' ' + licence_first_word))

     def full_line_parts(self, licence_line):
         return [re.escape(self.comment_prefix), licence_line]

     def full_line_pattern(self, licence_line):
         '''Returns a regex pattern which matches one line of licence text.'''
         return re.compile(' '.join(self.full_line_parts(licence_line)))


 class LineCommentStyle(CommentStyle):
     """Helpers for line-style comments."""
     def __init__(self, prefix):
         super().__init__(prefix, prefix)


 class DifferentFirstLineCommentStyle(CommentStyle):
     """Some files have a different allowable prefix for their first line."""
     def __init__(self, first_line_prefix, prefix):
         super().__init__(first_line_prefix, prefix)


 class BlockCommentStyle(CommentStyle):
     """Helpers for block-style comments."""
     def __init__(self, prefix, suffix):
         super().__init__(prefix, prefix)
         self.comment_suffix = str(suffix)

     def full_line_parts(self, licence_line):
         return [
             re.escape(self.comment_prefix), licence_line,
             re.escape(self.comment_suffix)
         ]


 SLASH_SLASH = '//'
 HASH = '#'
 SLASH_STAR = '/*'

 COMMENT_STYLES = {
     SLASH_SLASH: LineCommentStyle("//"),
     HASH: LineCommentStyle("#"),
     SLASH_STAR: BlockCommentStyle("/*", "*/"),
     'corefile': DifferentFirstLineCommentStyle("CAPI=2", "#")
 }

 # (Prioritised) Mapping of file name suffixes to comment style. If the suffix
 # of your file does not match one of these, it will not be checked.
 #
 # Each entry is a pair (suffixes, styles). suffixes is a list of file suffixes:
 # if a filename matches one of these suffixes, we'll use the styles in styles.
 # styles is either a string or a list of strings. If there is one or more
 # strings, these strings must all be keys of COMMENT_STYLES and they give the
 # different comment styles that are acceptable for the file type.
 #
 # These rules are given in priority order. Tuples higher in the list are
 # matched before those later in the list, on purpose.
 #
 # Files that either do not match any extension or that have an empty list of
 # styles are not checked for a licence.
 COMMENT_CHARS = [
     # Hardware Files
     ([".svh", ".sv", ".sv.tpl"], SLASH_SLASH),  # SystemVerilog

     # Hardware Build Systems
     ([".tcl", ".sdc"], HASH),  # tcl
     ([".core", ".core.tpl"], 'corefile'),  # FuseSoC Core Files
     (["Makefile", ".mk"], HASH),  # Makefiles
     ([".ys"], HASH),  # Yosys script
     ([".waiver"], HASH),  # AscentLint waiver files
     ([".vlt"], SLASH_SLASH),  # Verilator configuration (waiver) files
     ([".vbl"], HASH),  # Verible configuration files
     ([".el", ".el.tpl"], SLASH_SLASH),  # Exclusion list
     ([".cfg", ".cfg.tpl"], [SLASH_SLASH,
                             HASH]),  # Kinds of configuration files
     ([".f"], []),  # File lists (not checked)

     # The following two rules will inevitably bite us.
     (["riviera_run.do"], HASH),  # Riviera dofile
     ([".do"], SLASH_SLASH),  # Cadence LEC dofile

     # Software Files
     ([".c", ".c.tpl", ".h", ".h.tpl", ".cc", ".cpp"], SLASH_SLASH),  # C, C++
     ([".def"], SLASH_SLASH),  # C, C++ X-Include List Declaration Files
     ([".S"], [SLASH_SLASH, SLASH_STAR]),  # Assembly (With Preprocessing)
     ([".s"], SLASH_STAR),  # Assembly (Without Preprocessing)
     ([".ld", ".ld.tpl"], SLASH_STAR),  # Linker Scripts
     ([".rs", ".rs.tpl"], SLASH_SLASH),  # Rust

     # Software Build Systems
     (["meson.build", "toolchain.txt", "meson_options.txt"], HASH),  # Meson

     # General Tooling
     ([".py"], HASH),  # Python
     ([".sh"], HASH),  # Shell Scripts
     (["Dockerfile"], HASH),  # Dockerfiles

     # Configuration
     ([".hjson", ".hjson.tpl"], SLASH_SLASH),  # hjson
     ([".yml", ".yaml"], HASH),  # YAML
     ([".toml"], HASH),  # TOML
     (["-requirements.txt"], HASH),  # Apt and Python requirements files
     (["redirector.conf"], HASH),  # nginx config

     # Documentation
     ([".md", ".md.tpl", ".html"], []),  # Markdown and HTML (not checked)
     ([".css"], SLASH_STAR),  # CSS
     ([".scss"], SLASH_SLASH),  # SCSS

     # Templates (Last because there are overlaps with extensions above)
     ([".tpl"], HASH),  # Mako templates
 ]


 class LicenceMatcher:
     '''An object to match a given licence at the start of a file'''
     def __init__(self, comment_style, licence, match_regex):
         self.style = comment_style
         self.expected_lines = list()
         # In case we are using regex matching we can pass the full line "as is"
         if match_regex:
             for i, ll in enumerate(licence):
                 try:
                     self.expected_lines.append(
                         comment_style.full_line_pattern(ll))
                     # Catch any regex error here and raise a runtime error.
                 except re.error as e:
                     raise RuntimeError(
                         "Can't compile line {} of the licence as a regular expression. Saw `{}`: {}"
                         .format(i, e.pattern[e.pos], e.msg))
             # use the "first line" as a licence marker
             self.search_marker = self.expected_lines[0]
         # For non-regex matching we need to escape everything.
         # This can never throw an exception as everything has been escaped and
         # therefore is always a legal regex.
         else:
             self.search_marker = comment_style.search_line_pattern(
                 licence.first_word)
             self.expected_lines = [
                 comment_style.full_line_pattern(re.escape(ll))
                 for ll in licence
             ]

         self.lines_left = []

     def looks_like_first_line_comment(self, line):
         return line.startswith(self.style.first_line_prefix)

     def looks_like_comment(self, line):
         return line.startswith(self.style.comment_prefix)

     def looks_like_first_line(self, line):
         return self.search_marker.match(line) is not None

     def start(self):
         '''Reset lines_left, to match at the start of the licence'''
         self.lines_left = self.expected_lines

     def take_line(self, line):
         '''Check whether line matches the next line of the licence.

         Returns a pair (matched, done). matched is true if the line matched. If
         this was the last line of the licence, done is true. On a match, this
         increments an internal counter, so the next call to take_line will
         match against the next line of the licence.

         '''
         # If we have no more lines to match, claim a match and that we're done.
         # This shouldn't happen in practice, except if the configuration has an
         # empty licence.
         if not self.lines_left:
             return (True, True)

         next_expected = self.lines_left[0]
         matched = next_expected.fullmatch(line)

         if not matched:
             return (False, False)

         if matched:
             self.lines_left = self.lines_left[1:]
             return (True, not self.lines_left)


 def detect_comment_char(all_matchers, filename):
     '''Find zero or more LicenceMatcher objects for filename

     all_matchers should be a dict like COMMENT_STYLES, but where the values are
     the corresponding LicenceMatcher objects.

     '''
     found = None
     for (suffixes, keys) in COMMENT_CHARS:
         if found is not None:
             break
         for suffix in suffixes:
             if filename.endswith(suffix):
                 found = keys
                 break

     if found is None:
         return []

     if not isinstance(found, list):
         assert isinstance(found, str)
         found = [found]

     return [all_matchers[key] for key in found]


 def git_find_repo_toplevel():
     git_output = subprocess.check_output(
         ['git', 'rev-parse', '--show-toplevel'])
     return Path(git_output.decode().strip()).resolve()


 def git_find_all_file_paths(top_level, search_paths):
     git_output = subprocess.check_output(
         ["git", "-C",
          str(top_level), "ls-files", "-z", "--", *search_paths])
     for path in git_output.rstrip(b"\0").split(b"\0"):
         yield Path(top_level, path.decode())


 class ResultsTracker(object):
     """Helper for tracking results"""
     def __init__(self, base_dir):
         self.base_dir = base_dir

     passed_count = 0
     failed_count = 0
     excluded_count = 0
     skipped_count = 0

     failing_paths = set()

     @property
     def total_count(self):
         return (self.passed_count + self.failed_count + self.skipped_count +
                 self.excluded_count)

     def passed(self, path, line_no, reason):
         rel_path = path.relative_to(self.base_dir)
         logging.debug("%s:%d PASSED: %s", str(rel_path), line_no, reason)
         self.passed_count += 1

     def failed(self, path, line_no, reason):
         rel_path = path.relative_to(self.base_dir)
         logging.error("%s:%d FAILED: %s", str(rel_path), line_no, reason)
         self.failing_paths.add(rel_path)
         self.failed_count += 1

     def skipped(self, path, reason):
         rel_path = path.relative_to(self.base_dir)
         logging.info("%s: SKIPPED: %s", str(rel_path), reason)
         self.skipped_count += 1

     def excluded(self, path, reason):
         rel_path = path.relative_to(self.base_dir)
         logging.debug("%s: EXCLUDED: %s", str(rel_path), reason)
         self.excluded_count += 1

     def any_failed(self):
         return self.failed_count > 0

     def display_nicely(self):
         headers = ["Results:", "Files"]
         results = [["Passed", self.passed_count],
                    ["Failed", self.failed_count],
                    ["Skipped", self.skipped_count],
                    ["Excluded", self.excluded_count],
                    ["Total", self.total_count]]

         return tabulate(results, headers, tablefmt="simple")


 def matches_exclude_pattern(config, file_path):
     rel_path = str(file_path.relative_to(config.base_dir))
     for exclude_pattern in config.exclude_paths:
         if fnmatch.fnmatch(rel_path, exclude_pattern):
             return True
     return False


 def check_paths(config, git_paths):
     results = ResultsTracker(config.base_dir)
     try:
         all_matchers = {
             key: LicenceMatcher(style, config.licence, config.match_regex)
             for key, style in COMMENT_STYLES.items()
         }
     except RuntimeError as e:
         exit(e)

     for filepath in git_find_all_file_paths(config.base_dir, git_paths):
         # Skip symlinks (with message)
         if filepath.is_symlink():
             results.excluded(filepath, "File is a symlink")
             continue

         # Skip non-file
         if not filepath.is_file():
             continue

         # Skip exclude patterns
         if matches_exclude_pattern(config, filepath):
             results.excluded(filepath, "Path matches exclude pattern")
             continue

         check_file_for_licence(all_matchers, results, filepath)

     return results


 def check_file_for_licence(all_matchers, results, filepath):
     matchers = detect_comment_char(all_matchers, filepath.name)

     if not matchers:
         results.skipped(filepath, "Unknown comment style")
         return

     if filepath.stat().st_size == 0:
         results.skipped(filepath, "Empty file")
         return

     problems = []
     for matcher in matchers:
         good, line_num, msg = check_file_with_matcher(matcher, filepath)
         if good:
             results.passed(filepath, line_num, msg)
             return
         else:
             problems.append((line_num, msg))

     # If we get here, we didn't find a matching licence
     for line_num, msg in problems:
         results.failed(filepath, line_num, msg)


 def check_file_with_matcher(matcher, filepath):
     '''Check the file at filepath against matcher.

     Returns a tuple (is_good, line_number, msg). is_good is True on success;
     False on failure. line_number is the position where the licence was found
     (on success) or where we gave up searching for it (on failure). msg is the
     associated success or error message.

     '''
     def next_line(file, line_no):
         return (next(file).rstrip(), line_no + 1)

     with filepath.open() as f:
         licence_assumed_start = None

         # Get first line
         try:
             line, line_no = next_line(f, 0)
         except StopIteration:
             return (False, 1, "Empty file")

         # Check first line against the first word of licence, or against a
         # possible different first line.
         if not matcher.looks_like_first_line(line):
             if not matcher.looks_like_first_line_comment(line):
                 return (False, line_no, "File does not start with comment")

             try:
                 line, line_no = next_line(f, line_no)
             except StopIteration:
                 return (False, line_no,
                         "Reached end of file before finding licence")

         # Skip lines that don't seem to be the first line of the licence
         while not matcher.looks_like_first_line(line):
             try:
                 line, line_no = next_line(f, line_no)
             except StopIteration:
                 return (False, line_no,
                         "Reached end of file before finding licence")

             if not matcher.looks_like_comment(line):
                 return (False, line_no,
                         "First comment ended before licence notice")

         # We found the marker, so we found the first line of the licence. The
         # current line is in the first comment, so check the line matches the
         # expected first line:
         licence_assumed_start = line_no
         matcher.start()
         matched, done = matcher.take_line(line)
         if not matched:
             return (False, line_no, "Licence does not match")

         while not done:
             try:
                 line, line_no = next_line(f, line_no)
             except StopIteration:
                 return (False, line_no,
                         "Reached end of file before finding licence")

             # Check against full expected line.
             matched, done = matcher.take_line(line)
             if not matched:
                 return (False, line_no, "Licence did not match")

     return (True, licence_assumed_start, "Licence found")


 def main():
     desc = "A tool to check the lowRISC licence header is in each source file"
     parser = argparse.ArgumentParser(description=desc)
     parser.add_argument("--config",
                         metavar="config.hjson",
                         type=argparse.FileType('r', encoding='UTF-8'),
                         required=True,
                         help="HJSON file to read for licence configuration.")
     parser.add_argument("paths",
                         metavar="path",
                         nargs='*',
                         default=["."],
                         help="Paths to check for licence headers.")
     parser.add_argument('-v',
                         "--verbose",
                         action='store_true',
                         dest='verbose',
                         help="Verbose output")

     options = parser.parse_args()

     if options.verbose:
         logging.basicConfig(format="%(levelname)s: %(message)s",
                             level=logging.INFO)
     else:
         logging.basicConfig(format="%(levelname)s: %(message)s")

     config = SimpleNamespace()
     config.base_dir = git_find_repo_toplevel()

     parsed_config = hjson.load(options.config)

     config.licence = LicenceHeader(parsed_config['licence'])
     config.exclude_paths = set(parsed_config['exclude_paths'])
     # Check whether we should use regex matching or full string matching.
     match_regex = parsed_config.get('match_regex', 'false')
     if match_regex not in ['true', 'false']:
         print('Invalid value for match_regex: {!r}. '
               'Should be "true" or "false".'.format(match_regex))
         exit(1)
     config.match_regex = match_regex == 'true'

     results = check_paths(config, options.paths)

     print(results.display_nicely())

     if results.any_failed():
         print("Failed:")
         for path in results.failing_paths:
             print("  {}".format(str(path)))
         print("")
         exit(1)
     else:
         exit(0)


 if __name__ == '__main__':
     main()
	#!/usr/bin/env python3
	#
	# Copyright lowRISC contributors.
	# Licensed under the Apache License, Version 2.0, see LICENSE for details.
	# SPDX-License-Identifier: Apache-2.0

	import argparse
	import fnmatch
	import logging
	import re
	import subprocess
	from pathlib import Path
	from types import SimpleNamespace

	import hjson
	from tabulate import tabulate


	class LicenceHeader(object):
	"""Represents the licence header we want to insert"""
	def __init__(self, text):
	self._lines = text.strip().splitlines()

	def __getitem__(self, idx):
	return self._lines.__getitem__(idx)

	def __len__(self):
	return self._lines.__len__()

	def numbered_lines(self, skip=0):
	"""Returns an iterator of (line_no, line_text).

	`line_no` counts from 1, and is for humans to count line numbers with.
	use `skip_lines` to skip enumerating the first few lines.
	"""
	return enumerate(self._lines[skip:], start=1 + skip)

	@property
	def first_word(self):
	(first_word, _) = self._lines[0].split(' ', 1)
	return first_word


	class CommentStyle:
	'''Base class for comment style objects'''
	def __init__(self, first_line_prefix, comment_prefix):
	self.first_line_prefix = first_line_prefix
	self.comment_prefix = comment_prefix

	def search_line_pattern(self, licence_first_word):
	return re.compile(
	re.escape(self.comment_prefix + ' ' + licence_first_word))

	def full_line_parts(self, licence_line):
	return [re.escape(self.comment_prefix), licence_line]

	def full_line_pattern(self, licence_line):
	'''Returns a regex pattern which matches one line of licence text.'''
	return re.compile(' '.join(self.full_line_parts(licence_line)))


	class LineCommentStyle(CommentStyle):
	"""Helpers for line-style comments."""
	def __init__(self, prefix):
	super().__init__(prefix, prefix)


	class DifferentFirstLineCommentStyle(CommentStyle):
	"""Some files have a different allowable prefix for their first line."""
	def __init__(self, first_line_prefix, prefix):
	super().__init__(first_line_prefix, prefix)


	class BlockCommentStyle(CommentStyle):
	"""Helpers for block-style comments."""
	def __init__(self, prefix, suffix):
	super().__init__(prefix, prefix)
	self.comment_suffix = str(suffix)

	def full_line_parts(self, licence_line):
	return [
	re.escape(self.comment_prefix), licence_line,
	re.escape(self.comment_suffix)
	]


	SLASH_SLASH = '//'
	HASH = '#'
	SLASH_STAR = '/*'

	COMMENT_STYLES = {
	SLASH_SLASH: LineCommentStyle("//"),
	HASH: LineCommentStyle("#"),
	SLASH_STAR: BlockCommentStyle("/", "/"),
	'corefile': DifferentFirstLineCommentStyle("CAPI=2", "#")
	}

	# (Prioritised) Mapping of file name suffixes to comment style. If the suffix
	# of your file does not match one of these, it will not be checked.
	#
	# Each entry is a pair (suffixes, styles). suffixes is a list of file suffixes:
	# if a filename matches one of these suffixes, we'll use the styles in styles.
	# styles is either a string or a list of strings. If there is one or more
	# strings, these strings must all be keys of COMMENT_STYLES and they give the
	# different comment styles that are acceptable for the file type.
	#
	# These rules are given in priority order. Tuples higher in the list are
	# matched before those later in the list, on purpose.
	#
	# Files that either do not match any extension or that have an empty list of
	# styles are not checked for a licence.
	COMMENT_CHARS = [
	# Hardware Files
	([".svh", ".sv", ".sv.tpl"], SLASH_SLASH), # SystemVerilog

	# Hardware Build Systems
	([".tcl", ".sdc"], HASH), # tcl
	([".core", ".core.tpl"], 'corefile'), # FuseSoC Core Files
	(["Makefile", ".mk"], HASH), # Makefiles
	([".ys"], HASH), # Yosys script
	([".waiver"], HASH), # AscentLint waiver files
	([".vlt"], SLASH_SLASH), # Verilator configuration (waiver) files
	([".vbl"], HASH), # Verible configuration files
	([".el", ".el.tpl"], SLASH_SLASH), # Exclusion list
	([".cfg", ".cfg.tpl"], [SLASH_SLASH,
	HASH]), # Kinds of configuration files
	([".f"], []), # File lists (not checked)

	# The following two rules will inevitably bite us.
	(["riviera_run.do"], HASH), # Riviera dofile
	([".do"], SLASH_SLASH), # Cadence LEC dofile

	# Software Files
	([".c", ".c.tpl", ".h", ".h.tpl", ".cc", ".cpp"], SLASH_SLASH), # C, C++
	([".def"], SLASH_SLASH), # C, C++ X-Include List Declaration Files
	([".S"], [SLASH_SLASH, SLASH_STAR]), # Assembly (With Preprocessing)
	([".s"], SLASH_STAR), # Assembly (Without Preprocessing)
	([".ld", ".ld.tpl"], SLASH_STAR), # Linker Scripts
	([".rs", ".rs.tpl"], SLASH_SLASH), # Rust

	# Software Build Systems
	(["meson.build", "toolchain.txt", "meson_options.txt"], HASH), # Meson

	# General Tooling
	([".py"], HASH), # Python
	([".sh"], HASH), # Shell Scripts
	(["Dockerfile"], HASH), # Dockerfiles

	# Configuration
	([".hjson", ".hjson.tpl"], SLASH_SLASH), # hjson
	([".yml", ".yaml"], HASH), # YAML
	([".toml"], HASH), # TOML
	(["-requirements.txt"], HASH), # Apt and Python requirements files
	(["redirector.conf"], HASH), # nginx config

	# Documentation
	([".md", ".md.tpl", ".html"], []), # Markdown and HTML (not checked)
	([".css"], SLASH_STAR), # CSS
	([".scss"], SLASH_SLASH), # SCSS

	# Templates (Last because there are overlaps with extensions above)
	([".tpl"], HASH), # Mako templates
	]


	class LicenceMatcher:
	'''An object to match a given licence at the start of a file'''
	def __init__(self, comment_style, licence, match_regex):
	self.style = comment_style
	self.expected_lines = list()
	# In case we are using regex matching we can pass the full line "as is"
	if match_regex:
	for i, ll in enumerate(licence):
	try:
	self.expected_lines.append(
	comment_style.full_line_pattern(ll))
	# Catch any regex error here and raise a runtime error.
	except re.error as e:
	raise RuntimeError(
	"Can't compile line {} of the licence as a regular expression. Saw `{}`: {}"
	.format(i, e.pattern[e.pos], e.msg))
	# use the "first line" as a licence marker
	self.search_marker = self.expected_lines[0]
	# For non-regex matching we need to escape everything.
	# This can never throw an exception as everything has been escaped and
	# therefore is always a legal regex.
	else:
	self.search_marker = comment_style.search_line_pattern(
	licence.first_word)
	self.expected_lines = [
	comment_style.full_line_pattern(re.escape(ll))
	for ll in licence
	]

	self.lines_left = []

	def looks_like_first_line_comment(self, line):
	return line.startswith(self.style.first_line_prefix)

	def looks_like_comment(self, line):
	return line.startswith(self.style.comment_prefix)

	def looks_like_first_line(self, line):
	return self.search_marker.match(line) is not None

	def start(self):
	'''Reset lines_left, to match at the start of the licence'''
	self.lines_left = self.expected_lines

	def take_line(self, line):
	'''Check whether line matches the next line of the licence.

	Returns a pair (matched, done). matched is true if the line matched. If
	this was the last line of the licence, done is true. On a match, this
	increments an internal counter, so the next call to take_line will
	match against the next line of the licence.

	'''
	# If we have no more lines to match, claim a match and that we're done.
	# This shouldn't happen in practice, except if the configuration has an
	# empty licence.
	if not self.lines_left:
	return (True, True)

	next_expected = self.lines_left[0]
	matched = next_expected.fullmatch(line)

	if not matched:
	return (False, False)

	if matched:
	self.lines_left = self.lines_left[1:]
	return (True, not self.lines_left)


	def detect_comment_char(all_matchers, filename):
	'''Find zero or more LicenceMatcher objects for filename

	all_matchers should be a dict like COMMENT_STYLES, but where the values are
	the corresponding LicenceMatcher objects.

	'''
	found = None
	for (suffixes, keys) in COMMENT_CHARS:
	if found is not None:
	break
	for suffix in suffixes:
	if filename.endswith(suffix):
	found = keys
	break

	if found is None:
	return []

	if not isinstance(found, list):
	assert isinstance(found, str)
	found = [found]

	return [all_matchers[key] for key in found]


	def git_find_repo_toplevel():
	git_output = subprocess.check_output(
	['git', 'rev-parse', '--show-toplevel'])
	return Path(git_output.decode().strip()).resolve()


	def git_find_all_file_paths(top_level, search_paths):
	git_output = subprocess.check_output(
	["git", "-C",
	str(top_level), "ls-files", "-z", "--", *search_paths])
	for path in git_output.rstrip(b"\0").split(b"\0"):
	yield Path(top_level, path.decode())


	class ResultsTracker(object):
	"""Helper for tracking results"""
	def __init__(self, base_dir):
	self.base_dir = base_dir

	passed_count = 0
	failed_count = 0
	excluded_count = 0
	skipped_count = 0

	failing_paths = set()

	@property
	def total_count(self):
	return (self.passed_count + self.failed_count + self.skipped_count +
	self.excluded_count)

	def passed(self, path, line_no, reason):
	rel_path = path.relative_to(self.base_dir)
	logging.debug("%s:%d PASSED: %s", str(rel_path), line_no, reason)
	self.passed_count += 1

	def failed(self, path, line_no, reason):
	rel_path = path.relative_to(self.base_dir)
	logging.error("%s:%d FAILED: %s", str(rel_path), line_no, reason)
	self.failing_paths.add(rel_path)
	self.failed_count += 1

	def skipped(self, path, reason):
	rel_path = path.relative_to(self.base_dir)
	logging.info("%s: SKIPPED: %s", str(rel_path), reason)
	self.skipped_count += 1

	def excluded(self, path, reason):
	rel_path = path.relative_to(self.base_dir)
	logging.debug("%s: EXCLUDED: %s", str(rel_path), reason)
	self.excluded_count += 1

	def any_failed(self):
	return self.failed_count > 0

	def display_nicely(self):
	headers = ["Results:", "Files"]
	results = [["Passed", self.passed_count],
	["Failed", self.failed_count],
	["Skipped", self.skipped_count],
	["Excluded", self.excluded_count],
	["Total", self.total_count]]

	return tabulate(results, headers, tablefmt="simple")


	def matches_exclude_pattern(config, file_path):
	rel_path = str(file_path.relative_to(config.base_dir))
	for exclude_pattern in config.exclude_paths:
	if fnmatch.fnmatch(rel_path, exclude_pattern):
	return True
	return False


	def check_paths(config, git_paths):
	results = ResultsTracker(config.base_dir)
	try:
	all_matchers = {
	key: LicenceMatcher(style, config.licence, config.match_regex)
	for key, style in COMMENT_STYLES.items()
	}
	except RuntimeError as e:
	exit(e)

	for filepath in git_find_all_file_paths(config.base_dir, git_paths):
	# Skip symlinks (with message)
	if filepath.is_symlink():
	results.excluded(filepath, "File is a symlink")
	continue

	# Skip non-file
	if not filepath.is_file():
	continue

	# Skip exclude patterns
	if matches_exclude_pattern(config, filepath):
	results.excluded(filepath, "Path matches exclude pattern")
	continue

	check_file_for_licence(all_matchers, results, filepath)

	return results


	def check_file_for_licence(all_matchers, results, filepath):
	matchers = detect_comment_char(all_matchers, filepath.name)

	if not matchers:
	results.skipped(filepath, "Unknown comment style")
	return

	if filepath.stat().st_size == 0:
	results.skipped(filepath, "Empty file")
	return

	problems = []
	for matcher in matchers:
	good, line_num, msg = check_file_with_matcher(matcher, filepath)
	if good:
	results.passed(filepath, line_num, msg)
	return
	else:
	problems.append((line_num, msg))

	# If we get here, we didn't find a matching licence
	for line_num, msg in problems:
	results.failed(filepath, line_num, msg)


	def check_file_with_matcher(matcher, filepath):
	'''Check the file at filepath against matcher.

	Returns a tuple (is_good, line_number, msg). is_good is True on success;
	False on failure. line_number is the position where the licence was found
	(on success) or where we gave up searching for it (on failure). msg is the
	associated success or error message.

	'''
	def next_line(file, line_no):
	return (next(file).rstrip(), line_no + 1)

	with filepath.open() as f:
	licence_assumed_start = None

	# Get first line
	try:
	line, line_no = next_line(f, 0)
	except StopIteration:
	return (False, 1, "Empty file")

	# Check first line against the first word of licence, or against a
	# possible different first line.
	if not matcher.looks_like_first_line(line):
	if not matcher.looks_like_first_line_comment(line):
	return (False, line_no, "File does not start with comment")

	try:
	line, line_no = next_line(f, line_no)
	except StopIteration:
	return (False, line_no,
	"Reached end of file before finding licence")

	# Skip lines that don't seem to be the first line of the licence
	while not matcher.looks_like_first_line(line):
	try:
	line, line_no = next_line(f, line_no)
	except StopIteration:
	return (False, line_no,
	"Reached end of file before finding licence")

	if not matcher.looks_like_comment(line):
	return (False, line_no,
	"First comment ended before licence notice")

	# We found the marker, so we found the first line of the licence. The
	# current line is in the first comment, so check the line matches the
	# expected first line:
	licence_assumed_start = line_no
	matcher.start()
	matched, done = matcher.take_line(line)
	if not matched:
	return (False, line_no, "Licence does not match")

	while not done:
	try:
	line, line_no = next_line(f, line_no)
	except StopIteration:
	return (False, line_no,
	"Reached end of file before finding licence")

	# Check against full expected line.
	matched, done = matcher.take_line(line)
	if not matched:
	return (False, line_no, "Licence did not match")

	return (True, licence_assumed_start, "Licence found")


	def main():
	desc = "A tool to check the lowRISC licence header is in each source file"
	parser = argparse.ArgumentParser(description=desc)
	parser.add_argument("--config",
	metavar="config.hjson",
	type=argparse.FileType('r', encoding='UTF-8'),
	required=True,
	help="HJSON file to read for licence configuration.")
	parser.add_argument("paths",
	metavar="path",
	nargs='*',
	default=["."],
	help="Paths to check for licence headers.")
	parser.add_argument('-v',
	"--verbose",
	action='store_true',
	dest='verbose',
	help="Verbose output")

	options = parser.parse_args()

	if options.verbose:
	logging.basicConfig(format="%(levelname)s: %(message)s",
	level=logging.INFO)
	else:
	logging.basicConfig(format="%(levelname)s: %(message)s")

	config = SimpleNamespace()
	config.base_dir = git_find_repo_toplevel()

	parsed_config = hjson.load(options.config)

	config.licence = LicenceHeader(parsed_config['licence'])
	config.exclude_paths = set(parsed_config['exclude_paths'])
	# Check whether we should use regex matching or full string matching.
	match_regex = parsed_config.get('match_regex', 'false')
	if match_regex not in ['true', 'false']:
	print('Invalid value for match_regex: {!r}. '
	'Should be "true" or "false".'.format(match_regex))
	exit(1)
	config.match_regex = match_regex == 'true'

	results = check_paths(config, options.paths)

	print(results.display_nicely())

	if results.any_failed():
	print("Failed:")
	for path in results.failing_paths:
	print(" {}".format(str(path)))
	print("")
	exit(1)
	else:
	exit(0)


	if __name__ == '__main__':
	main()