[util] Catch more problems with commit lint
This improves the commit lint script to catch a couple more problems:
* The commit message is now checked:
* We require an empty second line, separating the summary (first line)
from the optional long description (lines 3-). (In fact, git
shortlog assumes that everything before the first empty line is the
summary, leading to a summary line containing the whole commit
message if there's no empty line in between.)
* The summary line should not be overly long. The often-recommended
limit is 50 characters, but there is no hard rule. This commit enforces
a 100 character limit on the first line, which should be lenient
enough.
* There's now a warning if the author name does not contain a space.
That typically indicates that the user didn't set his real name but a
GitHub username as name. It's not an error because there's no
universal rule that names must consist of multiple words.
In addition to these functional changes, the implementation switched to
use GitPython, avoiding manually calling git and parsing its output.
Note: Merge commits are not checked in our CI setup. When Azure
Pipelines runs, it tests the PR merge into origin/master. The merge
commit is added by GitHub with author information as set in the GitHub
UI: the full name (if set) or the GitHub username, and the public email
address. This commit will not appear in our repository when doing
"rebase merges". Checking merge commits would check something other than
the commits as we ultimately see them in our repo, raising false alarm.
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 5b94b7e..22b10b5 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -63,10 +63,22 @@
# Run it only on pull requests.
condition: eq(variables['Build.Reason'], 'PullRequest')
displayName: 'Use clang-format to check C/C++ coding style'
-
+
- bash: |
commit_range=$(git merge-base --fork-point origin/master)..HEAD
- ./util/lint_commits.py --error-msg-prefix='##vso[task.logissue type=error]' $commit_range
+ # Notes:
+ # * Merge commits are not checked. We always use rebases instead of
+ # merges to keep a linear history, which makes merge commits disappear
+ # ultimately, making them only a CI artifact which should not be
+ # checked.
+ # * 'type=error' is used even for warnings. Only "errors" are shown in
+ # the GitHub checks API. However, warnings don't return a non-zero
+ # error code and don't fail the build step.
+ ./util/lint_commits.py \
+ --no-merges \
+ --error-msg-prefix='##vso[task.logissue type=error]' \
+ --warning-msg-prefix='##vso[task.logissue type=error]' \
+ $commit_range
# Only run on pull requests to check new commits only
condition: eq(variables['Build.Reason'], 'PullRequest')
displayName: "Check commit metadata"
diff --git a/python-requirements.txt b/python-requirements.txt
index bdde800..7ccf2a2 100644
--- a/python-requirements.txt
+++ b/python-requirements.txt
@@ -9,6 +9,7 @@
pyyaml
pytest
fusesoc
+gitpython
# Develpment version to get around YAML parser warning triggered by fusesoc
# Upstream tracking: https://github.com/olofk/ipyxact/issues/19
diff --git a/util/lint_commits.py b/util/lint_commits.py
index bc9a462..3289f20 100755
--- a/util/lint_commits.py
+++ b/util/lint_commits.py
@@ -7,51 +7,129 @@
import subprocess
import sys
-error_msg_prefix = ''
+from git import Repo
+
+error_msg_prefix = 'ERROR: '
+warning_msg_prefix = 'WARNING: '
+
+# Maximum length of the summary line in the commit message (the first line)
+# There is no hard limit, but a typical convention is to keep this line at or
+# below 50 characters, with occasional outliers.
+COMMIT_MSG_MAX_SUMMARAY_LEN = 100
-def error(msg, *args, **kwargs):
- print(error_msg_prefix + msg, file=sys.stderr)
+def error(msg, commit=None):
+ full_msg = msg
+ if commit:
+ full_msg = "Commit %s: %s" % (commit.hexsha, msg)
+ print(error_msg_prefix + full_msg, file=sys.stderr)
+
+
+def warning(msg, commit=None):
+ full_msg = msg
+ if commit:
+ full_msg = "Commit %s: %s" % (commit.hexsha, msg)
+ print(warning_msg_prefix + full_msg, file=sys.stderr)
+
+
+def lint_commit_author(commit):
+ success = True
+ if commit.author.email.endswith('users.noreply.github.com'):
+ error(
+ 'Commit author has no valid email address set: %s. '
+ 'Use "git config user.email user@example.com" to '
+ 'set a valid email address, and update the commit '
+ 'with "git rebase -i" and/or '
+ '"git commit --amend --reset-author". '
+ 'Also check your GitHub settings at '
+ 'https://github.com/settings/emails: your email address '
+ 'must be verified, and the option "Keep my email address '
+ 'private" must be disabled.' % (commit.author.email, ), commit)
+ success = False
+
+ if not ' ' in commit.author.name:
+ warning(
+ 'The commit author name "%s" does contain no space. '
+ 'Use "git config user.name \'Johnny English\'" to '
+ 'set your real name, and update the commit with "git rebase -i " '
+ 'and/or "git commit --amend --reset-author".' %
+ (commit.author.name, ), commit)
+ # A warning doesn't fail lint.
+
+ return success
+
+
+def lint_commit_message(commit):
+ success = True
+ lines = commit.message.splitlines()
+
+ # Check length of summary line.
+ summary_line_len = len(lines[0])
+ if summary_line_len > COMMIT_MSG_MAX_SUMMARAY_LEN:
+ error(
+ "The summary line in the commit message %d characters long, "
+ "only %d characters are allowed." %
+ (summary_line_len, COMMIT_MSG_MAX_SUMMARAY_LEN), commit)
+ success = False
+
+ # Check for an empty line separating the summary line from the long
+ # description.
+ if len(lines) > 1 and lines[1] != "":
+ error(
+ "The second line of a commit message must be empty, as it "
+ "separates the summary from the long description.", commit)
+ success = False
+ return success
+
+
+def lint_commit(commit):
+ success = True
+ if not lint_commit_author(commit):
+ success = False
+ if not lint_commit_message(commit):
+ success = False
+ return success
def main():
+ global error_msg_prefix
+ global warning_msg_prefix
+
parser = argparse.ArgumentParser(
description='Check commit meta data for common mistakes')
parser.add_argument('--error-msg-prefix',
- default='ERROR: ',
+ default=error_msg_prefix,
required=False,
help='string to prepend to all error messages')
- parser.add_argument('commitrange',
+ parser.add_argument('--warning-msg-prefix',
+ default=warning_msg_prefix,
+ required=False,
+ help='string to prepend to all warning messages')
+ parser.add_argument('--no-merges',
+ required=False,
+ action="store_true",
+ help='do not check commits with more than one parent')
+ parser.add_argument('commit_range',
metavar='commit-range',
help='git log-compatible commit range to check')
args = parser.parse_args()
- global error_msg_prefix
error_msg_prefix = args.error_msg_prefix
+ warning_msg_prefix = args.warning_msg_prefix
- cmd = ['git', 'log', '--pretty=%H;%ae;%an', args.commitrange]
- commits = subprocess.run(cmd,
- stdout=subprocess.PIPE,
- check=True,
- universal_newlines=True).stdout
+ lint_successful = True
- has_error = False
- for commit in commits.splitlines():
- (sha, author_email, author_name) = commit.split(';', 3)
- print("Checking commit %s by %s <%s>" %
- (sha, author_name, author_email))
- if author_email.endswith('users.noreply.github.com'):
- error('Author of commit %s has no valid email address set: %s. '
- 'Use "git config user.email user@example.com" to '
- 'set a valid email address, and update the commit '
- 'with "git rebase -i" and/or '
- '"git commit --amend --reset-author". '
- 'You also need to disable "Keep my email address '
- 'private" in the GitHub email settings.' %
- (sha, author_email))
- has_error = True
+ repo = Repo()
+ commits = repo.iter_commits(args.commit_range)
+ for commit in commits:
+ print("Checking commit %s" % commit.hexsha)
+ is_merge = len(commit.parents) > 1
+ if is_merge and args.no_merges:
+ print("Skipping merge commit.")
+ if not lint_commit(commit):
+ lint_successful = False
- if has_error:
+ if not lint_successful:
error('Commit lint failed.')
sys.exit(1)