blob: 2a0bcb4cc5480563e7214a0edc014d0bbe980ffe [file] [log] [blame]
lowRISC Contributors802543a2019-08-31 12:12:56 +01001#!/usr/bin/env python3
2# Copyright lowRISC contributors.
3# Licensed under the Apache License, Version 2.0, see LICENSE for details.
4# SPDX-License-Identifier: Apache-2.0
5
6import argparse
7import fnmatch
8import logging as log
9import os
10import re
11import shutil
12import subprocess
13import sys
14import tempfile
15import textwrap
16from pathlib import Path
17
18import hjson
19
Miguel Osorio0bcb3512019-11-15 12:34:32 -080020DESC = """vendor, copy source code from upstream into this repository"""
lowRISC Contributors802543a2019-08-31 12:12:56 +010021
22EXCLUDE_ALWAYS = ['.git']
23
24LOCK_FILE_HEADER = """// Copyright lowRISC contributors.
25// Licensed under the Apache License, Version 2.0, see LICENSE for details.
26// SPDX-License-Identifier: Apache-2.0
27
Miguel Osorio0bcb3512019-11-15 12:34:32 -080028// This file is generated by the util/vendor script. Please do not modify it
lowRISC Contributors802543a2019-08-31 12:12:56 +010029// manually.
30
31"""
32
33verbose = False
34
35
36def git_is_clean_workdir(git_workdir):
37 """Check if the git working directory is clean (no unstaged or staged changes)"""
38 cmd = ['git', 'status', '--untracked-files=no', '--porcelain']
39 modified_files = subprocess.run(cmd,
Sam Elliotta24497b2020-04-23 15:06:46 +010040 cwd=str(git_workdir),
lowRISC Contributors802543a2019-08-31 12:12:56 +010041 check=True,
42 stdout=subprocess.PIPE,
43 stderr=subprocess.PIPE).stdout.strip()
44 return not modified_files
45
46
47def path_resolve(path, base_dir=Path.cwd()):
48 """Create an absolute path. Relative paths are resolved using base_dir as base."""
49
50 if isinstance(path, str):
51 path = Path(path)
52
53 if path.is_absolute():
54 return path
55
56 return (base_dir / path).resolve()
57
58
59def github_qualify_references(log, repo_userorg, repo_name):
60 """ Replace "unqualified" GitHub references with "fully qualified" one
61
62 GitHub automatically links issues and pull requests if they have a specific
63 format. Links can be qualified with the user/org name and the repository
64 name, or unqualified, if they only contain the issue or pull request number.
65
66 This function converts all unqualified references to qualified ones.
67
68 See https://help.github.com/en/articles/autolinked-references-and-urls#issues-and-pull-requests
69 for a documentation of all supported formats.
70 """
71
72 r = re.compile(r"(^|[^\w])(?:#|[gG][hH]-)(\d+)\b")
73 repl_str = r'\1%s/%s#\2' % (repo_userorg, repo_name)
74 return [r.sub(repl_str, l) for l in log]
75
76
77def test_github_qualify_references():
78 repo_userorg = 'lowRISC'
79 repo_name = 'ibex'
80
81 # Unqualified references, should be replaced
82 items_unqualified = [
83 '#28',
84 'GH-27',
85 'klaus #27',
86 'Fixes #27',
87 'Fixes #27 and #28',
88 '(#27)',
89 'something (#27) done',
90 '#27 and (GH-38)',
91 ]
92 exp_items_unqualified = [
93 'lowRISC/ibex#28',
94 'lowRISC/ibex#27',
95 'klaus lowRISC/ibex#27',
96 'Fixes lowRISC/ibex#27',
97 'Fixes lowRISC/ibex#27 and lowRISC/ibex#28',
98 '(lowRISC/ibex#27)',
99 'something (lowRISC/ibex#27) done',
100 'lowRISC/ibex#27 and (lowRISC/ibex#38)',
101 ]
102 assert github_qualify_references(items_unqualified, repo_userorg,
103 repo_name) == exp_items_unqualified
104
105 # Qualified references, should stay as they are
106 items_qualified = [
107 'Fixes lowrisc/ibex#27',
108 'lowrisc/ibex#2',
109 ]
110 assert github_qualify_references(items_qualified, repo_userorg,
111 repo_name) == items_qualified
112
113 # Invalid references, should stay as they are
114 items_invalid = [
115 'something#27',
116 'lowrisc/ibex#',
117 ]
118 assert github_qualify_references(items_invalid, repo_userorg,
119 repo_name) == items_invalid
120
121
122def test_github_parse_url():
123 assert github_parse_url('https://example.com/something/asdf.git') is None
124 assert github_parse_url('https://github.com/lowRISC/ibex.git') == (
125 'lowRISC', 'ibex')
126 assert github_parse_url('https://github.com/lowRISC/ibex') == ('lowRISC',
127 'ibex')
128 assert github_parse_url('git@github.com:lowRISC/ibex.git') == ('lowRISC',
129 'ibex')
130
131
132def github_parse_url(github_repo_url):
133 """Parse a GitHub repository URL into its parts.
134
135 Return a tuple (userorg, name), or None if the parsing failed.
136 """
137
138 regex = r"(?:@github\.com\:|\/github\.com\/)([a-zA-Z\d-]+)\/([a-zA-Z\d-]+)(?:\.git)?$"
139 m = re.search(regex, github_repo_url)
140 if m is None:
141 return None
142 return (m.group(1), m.group(2))
143
144
145def produce_shortlog(clone_dir, old_rev, new_rev):
146 """ Produce a list of changes between two revisions, one revision per line
147
148 Merges are excluded"""
149 cmd = [
150 'git', '-C',
151 str(clone_dir), 'log', '--pretty=format:%s (%aN)', '--no-merges',
Tim Shepard24b72032019-10-23 16:40:04 -0400152 old_rev + '..' + new_rev, '.'
lowRISC Contributors802543a2019-08-31 12:12:56 +0100153 ]
154 try:
155 proc = subprocess.run(cmd,
Sam Elliotta24497b2020-04-23 15:06:46 +0100156 cwd=str(clone_dir),
lowRISC Contributors802543a2019-08-31 12:12:56 +0100157 check=True,
158 stdout=subprocess.PIPE,
159 stderr=subprocess.PIPE,
Silvestrs Timofejevs1bf7b0a2019-11-13 14:14:12 +0000160 universal_newlines=True)
lowRISC Contributors802543a2019-08-31 12:12:56 +0100161 return proc.stdout.splitlines()
162 except subprocess.CalledProcessError as e:
163 log.error("Unable to capture shortlog: %s", e.stderr)
164 return ""
165
166
167def format_list_to_str(list, width=70):
168 """ Create Markdown-style formatted string from a list of strings """
169 wrapper = textwrap.TextWrapper(initial_indent="* ",
170 subsequent_indent=" ",
171 width=width)
172 return '\n'.join([wrapper.fill(s) for s in list])
173
174
175def refresh_patches(desc):
176 if not 'patch_repo' in desc:
177 log.fatal('Unable to refresh patches, patch_repo not set in config.')
178 sys.exit(1)
179
180 patch_dir_abs = path_resolve(desc['patch_dir'], desc['_base_dir'])
181 log.info('Refreshing patches in %s' % (str(patch_dir_abs), ))
182
183 # remove existing patches
184 for patch in patch_dir_abs.glob('*.patch'):
185 os.unlink(str(patch))
186
187 # get current patches
188 _export_patches(desc['patch_repo']['url'], patch_dir_abs,
189 desc['patch_repo']['rev_base'],
190 desc['patch_repo']['rev_patched'])
191
192
193def _export_patches(patchrepo_clone_url, target_patch_dir, upstream_rev,
194 patched_rev):
195 clone_dir = Path(tempfile.mkdtemp())
196 try:
197 clone_git_repo(patchrepo_clone_url, clone_dir, patched_rev)
198 rev_range = 'origin/' + upstream_rev + '..' + 'origin/' + patched_rev
199 cmd = ['git', 'format-patch', '-o', str(target_patch_dir), rev_range]
200 if not verbose:
201 cmd += ['-q']
Sam Elliotta24497b2020-04-23 15:06:46 +0100202 subprocess.run(cmd, cwd=str(clone_dir), check=True)
lowRISC Contributors802543a2019-08-31 12:12:56 +0100203
204 finally:
205 shutil.rmtree(str(clone_dir), ignore_errors=True)
206
207
208def import_from_upstream(upstream_path, target_path, exclude_files=[]):
209 log.info('Copying upstream sources to %s', target_path)
210 # remove existing directories before importing them again
211 shutil.rmtree(str(target_path), ignore_errors=True)
212
213 # import new contents for rtl directory
214 _cp_from_upstream(upstream_path, target_path, exclude_files)
215
216
217def apply_patch(basedir, patchfile, strip_level=1):
Sam Elliotta24497b2020-04-23 15:06:46 +0100218 cmd = ['git', 'apply', '-p' + str(strip_level), str(patchfile)]
lowRISC Contributors802543a2019-08-31 12:12:56 +0100219 if verbose:
220 cmd += ['--verbose']
Sam Elliotta24497b2020-04-23 15:06:46 +0100221 subprocess.run(cmd, cwd=str(basedir), check=True)
lowRISC Contributors802543a2019-08-31 12:12:56 +0100222
223
224def clone_git_repo(repo_url, clone_dir, rev='master'):
225 log.info('Cloning upstream repository %s @ %s', repo_url, rev)
226
Sam Elliotta24497b2020-04-23 15:06:46 +0100227 # Clone the whole repository
228 cmd = ['git', 'clone', '--no-single-branch']
229 if not verbose:
230 cmd += ['-q']
231 cmd += [repo_url, str(clone_dir)]
232 subprocess.run(cmd, check=True)
233
234 # Check out exactly the revision requested
235 cmd = ['git', '-C', str(clone_dir), 'reset', '--hard', rev]
lowRISC Contributors802543a2019-08-31 12:12:56 +0100236 if not verbose:
237 cmd += ['-q']
238 subprocess.run(cmd, check=True)
239
240 # Get revision information
241 cmd = ['git', '-C', str(clone_dir), 'rev-parse', 'HEAD']
242 rev = subprocess.run(cmd,
243 stdout=subprocess.PIPE,
244 stderr=subprocess.PIPE,
245 check=True,
Silvestrs Timofejevs1bf7b0a2019-11-13 14:14:12 +0000246 universal_newlines=True).stdout.strip()
lowRISC Contributors802543a2019-08-31 12:12:56 +0100247 log.info('Cloned at revision %s', rev)
248 return rev
249
250
251def git_get_short_rev(clone_dir, rev):
252 """ Get the shortened SHA-1 hash for a revision """
253 cmd = ['git', '-C', str(clone_dir), 'rev-parse', '--short', rev]
254 short_rev = subprocess.run(cmd,
255 stdout=subprocess.PIPE,
256 stderr=subprocess.PIPE,
257 check=True,
Silvestrs Timofejevs1bf7b0a2019-11-13 14:14:12 +0000258 universal_newlines=True).stdout.strip()
lowRISC Contributors802543a2019-08-31 12:12:56 +0100259 return short_rev
260
261
262def git_add_commit(repo_base, paths, commit_msg):
263 """ Stage and commit all changes in paths"""
264
265 # Stage all changes
266 for p in paths:
267 cmd_add = ['git', '-C', str(repo_base), 'add', str(p)]
Silvestrs Timofejevs1bf7b0a2019-11-13 14:14:12 +0000268 subprocess.run(cmd_add, check=True)
lowRISC Contributors802543a2019-08-31 12:12:56 +0100269
Garret Kellyb3c2bc52019-11-07 14:57:39 -0500270 cmd_commit = ['git', '-C', str(repo_base), 'commit', '-s', '-F', '-']
lowRISC Contributors802543a2019-08-31 12:12:56 +0100271 try:
272 subprocess.run(cmd_commit,
273 check=True,
Silvestrs Timofejevs1bf7b0a2019-11-13 14:14:12 +0000274 universal_newlines=True,
lowRISC Contributors802543a2019-08-31 12:12:56 +0100275 input=commit_msg)
276 except subprocess.CalledProcessError as e:
277 log.warning("Unable to create commit. Are there no changes?")
278
279
280def ignore_patterns(base_dir, *patterns):
281 """Similar to shutil.ignore_patterns, but with support for directory excludes."""
282 def _rel_to_base(path, name):
283 return os.path.relpath(os.path.join(path, name), base_dir)
284
285 def _ignore_patterns(path, names):
286 ignored_names = []
287 for pattern in patterns:
288 pattern_matches = [
289 n for n in names
290 if fnmatch.fnmatch(_rel_to_base(path, n), pattern)
291 ]
292 ignored_names.extend(pattern_matches)
293 return set(ignored_names)
294
295 return _ignore_patterns
296
297
298def _cp_from_upstream(src, dest, exclude=[]):
299 shutil.copytree(str(src),
300 str(dest),
301 ignore=ignore_patterns(str(src), *exclude))
302
303
304def main(argv):
Miguel Osorio0bcb3512019-11-15 12:34:32 -0800305 parser = argparse.ArgumentParser(prog="vendor", description=DESC)
Sam Elliotta24497b2020-04-23 15:06:46 +0100306 parser.add_argument(
307 '--update',
308 '-U',
309 dest='update',
310 action='store_true',
311 help='Update locked version of repository with upstream changes')
lowRISC Contributors802543a2019-08-31 12:12:56 +0100312 parser.add_argument('--refresh-patches',
313 action='store_true',
314 help='Refresh the patches from the patch repository')
315 parser.add_argument('--commit',
316 '-c',
317 action='store_true',
318 help='Commit the changes')
319 parser.add_argument('desc_file',
320 metavar='file',
321 type=argparse.FileType('r', encoding='UTF-8'),
322 help='vendoring description file (*.vendor.hjson)')
323 parser.add_argument('--verbose', '-v', action='store_true', help='Verbose')
324 args = parser.parse_args()
325
326 global verbose
327 verbose = args.verbose
328 if (verbose):
329 log.basicConfig(format="%(levelname)s: %(message)s", level=log.DEBUG)
330 else:
331 log.basicConfig(format="%(levelname)s: %(message)s")
332
333 desc_file_path = Path(args.desc_file.name).resolve()
334 vendor_file_base_dir = desc_file_path.parent
335
336 # Precondition: Ensure description file matches our naming rules
337 if not str(desc_file_path).endswith('.vendor.hjson'):
338 log.fatal("Description file names must have a .vendor.hjson suffix.")
339 raise SystemExit(1)
340
341 # Precondition: Check for a clean working directory when commit is requested
342 if args.commit:
343 if not git_is_clean_workdir(vendor_file_base_dir):
344 log.fatal("A clean git working directory is required for "
345 "--commit/-c. git stash your changes and try again.")
346 raise SystemExit(1)
347
348 # Load description file
349 try:
350 desc = hjson.loads(args.desc_file.read(), use_decimal=True)
351 except ValueError:
352 raise SystemExit(sys.exc_info()[1])
353 desc['_base_dir'] = vendor_file_base_dir
354
Sam Elliotta24497b2020-04-23 15:06:46 +0100355
356 desc_file_stem = desc_file_path.name.rsplit('.', 2)[0]
357 lock_file_path = desc_file_path.with_name(desc_file_stem + '.lock.hjson')
358
359 # Importing may use lock file upstream, information, so make a copy now
360 # which we can overwrite with the upstream information from the lock file.
361 import_desc = desc.copy()
362
lowRISC Contributors802543a2019-08-31 12:12:56 +0100363 # Load lock file contents (if possible)
lowRISC Contributors802543a2019-08-31 12:12:56 +0100364 try:
Sam Elliotta24497b2020-04-23 15:06:46 +0100365 with open(str(lock_file_path), 'r') as f:
lowRISC Contributors802543a2019-08-31 12:12:56 +0100366 lock = hjson.loads(f.read(), use_decimal=True)
Sam Elliotta24497b2020-04-23 15:06:46 +0100367
368 # Use lock file information for import
369 if not args.update:
370 import_desc['upstream'] = lock['upstream'].copy()
lowRISC Contributors802543a2019-08-31 12:12:56 +0100371 except FileNotFoundError:
lowRISC Contributors802543a2019-08-31 12:12:56 +0100372 lock = None
Sam Elliotta24497b2020-04-23 15:06:46 +0100373 if not args.update:
374 log.warning("Updating upstream repo as lock file %s not found.",
375 str(lock_file_path))
376 args.update = True
lowRISC Contributors802543a2019-08-31 12:12:56 +0100377
378 if args.refresh_patches:
Sam Elliotta24497b2020-04-23 15:06:46 +0100379 refresh_patches(import_desc)
lowRISC Contributors802543a2019-08-31 12:12:56 +0100380
381 clone_dir = Path(tempfile.mkdtemp())
382 try:
383 # clone upstream repository
Sam Elliotta24497b2020-04-23 15:06:46 +0100384 upstream_new_rev = clone_git_repo(import_desc['upstream']['url'],
385 clone_dir,
386 rev=import_desc['upstream']['rev'])
387
388 if not args.update:
389 if upstream_new_rev != lock['upstream']['rev']:
390 log.fatal(
391 "Revision mismatch. Unable to re-clone locked version of repository."
392 )
393 log.fatal("Attempted revision: %s", import_desc['upstream']['rev'])
394 log.fatal("Re-cloned revision: %s", upstream_new_rev)
395 raise SystemExit(1)
lowRISC Contributors802543a2019-08-31 12:12:56 +0100396
Tim Shepard24b72032019-10-23 16:40:04 -0400397 upstream_only_subdir = ''
398 clone_subdir = clone_dir
Sam Elliotta24497b2020-04-23 15:06:46 +0100399 if 'only_subdir' in import_desc['upstream']:
400 upstream_only_subdir = import_desc['upstream']['only_subdir']
Tim Shepard24b72032019-10-23 16:40:04 -0400401 clone_subdir = clone_dir / upstream_only_subdir
402 if not clone_subdir.is_dir():
Sam Elliotta24497b2020-04-23 15:06:46 +0100403 log.fatal("subdir '%s' does not exist in repo",
404 upstream_only_subdir)
Tim Shepard24b72032019-10-23 16:40:04 -0400405 raise SystemExit(1)
406
lowRISC Contributors802543a2019-08-31 12:12:56 +0100407 # apply patches to upstream sources
Sam Elliotta24497b2020-04-23 15:06:46 +0100408 if 'patch_dir' in import_desc:
409 patches = path_resolve(import_desc['patch_dir'],
lowRISC Contributors802543a2019-08-31 12:12:56 +0100410 vendor_file_base_dir).glob('*.patch')
411 for patch in sorted(patches):
412 log.info("Applying patch %s" % str(patch))
Sam Elliotta24497b2020-04-23 15:06:46 +0100413 apply_patch(clone_subdir, patch)
lowRISC Contributors802543a2019-08-31 12:12:56 +0100414
415 # import selected (patched) files from upstream repo
416 exclude_files = []
Sam Elliotta24497b2020-04-23 15:06:46 +0100417 if 'exclude_from_upstream' in import_desc:
418 exclude_files += import_desc['exclude_from_upstream']
lowRISC Contributors802543a2019-08-31 12:12:56 +0100419 exclude_files += EXCLUDE_ALWAYS
420
421 import_from_upstream(
Sam Elliotta24497b2020-04-23 15:06:46 +0100422 clone_subdir, path_resolve(import_desc['target_dir'],
423 vendor_file_base_dir), exclude_files)
lowRISC Contributors802543a2019-08-31 12:12:56 +0100424
425 # get shortlog
Sam Elliotta24497b2020-04-23 15:06:46 +0100426 get_shortlog = bool(args.update)
427 if lock is None:
lowRISC Contributors802543a2019-08-31 12:12:56 +0100428 get_shortlog = False
Sam Elliotta24497b2020-04-23 15:06:46 +0100429 log.warning("No lock file %s: unable to summarize changes.", str(lock_file_path))
430 elif lock['upstream']['url'] != import_desc['upstream']['url']:
lowRISC Contributors802543a2019-08-31 12:12:56 +0100431 get_shortlog = False
432 log.warning(
433 "The repository URL changed since the last run. Unable to get log of changes."
434 )
lowRISC Contributors802543a2019-08-31 12:12:56 +0100435
436 shortlog = None
437 if get_shortlog:
Tim Shepard24b72032019-10-23 16:40:04 -0400438 shortlog = produce_shortlog(clone_subdir, lock['upstream']['rev'],
lowRISC Contributors802543a2019-08-31 12:12:56 +0100439 upstream_new_rev)
440
441 # Ensure fully-qualified issue/PR references for GitHub repos
Sam Elliotta24497b2020-04-23 15:06:46 +0100442 gh_repo_info = github_parse_url(import_desc['upstream']['url'])
lowRISC Contributors802543a2019-08-31 12:12:56 +0100443 if gh_repo_info:
444 shortlog = github_qualify_references(shortlog, gh_repo_info[0],
445 gh_repo_info[1])
446
447 log.info("Changes since the last import:\n" +
448 format_list_to_str(shortlog))
449
450 # write lock file
Sam Elliotta24497b2020-04-23 15:06:46 +0100451 if args.update:
452 lock = {}
453 lock['upstream'] = import_desc['upstream'].copy()
454 lock['upstream']['rev'] = upstream_new_rev
455 with open(str(lock_file_path), 'w', encoding='UTF-8') as f:
456 f.write(LOCK_FILE_HEADER)
457 hjson.dump(lock, f)
458 f.write("\n")
459 log.info("Wrote lock file %s", str(lock_file_path))
lowRISC Contributors802543a2019-08-31 12:12:56 +0100460
461 # Commit changes
462 if args.commit:
Tim Shepard24b72032019-10-23 16:40:04 -0400463 sha_short = git_get_short_rev(clone_subdir, upstream_new_rev)
lowRISC Contributors802543a2019-08-31 12:12:56 +0100464
Sam Elliotta24497b2020-04-23 15:06:46 +0100465 repo_info = github_parse_url(import_desc['upstream']['url'])
lowRISC Contributors802543a2019-08-31 12:12:56 +0100466 if repo_info is not None:
467 sha_short = "%s/%s@%s" % (repo_info[0], repo_info[1],
468 sha_short)
469
Sam Elliotta24497b2020-04-23 15:06:46 +0100470 commit_msg_subject = 'Update %s to %s' % (import_desc['name'], sha_short)
Tim Shepard24b72032019-10-23 16:40:04 -0400471 subdir_msg = ' '
472 if upstream_only_subdir:
473 subdir_msg = ' subdir %s in ' % upstream_only_subdir
474 intro = 'Update code from%supstream repository %s to revision %s' % (
Sam Elliotta24497b2020-04-23 15:06:46 +0100475 subdir_msg, import_desc['upstream']['url'], upstream_new_rev)
lowRISC Contributors802543a2019-08-31 12:12:56 +0100476 commit_msg_body = textwrap.fill(intro, width=70)
477
478 if shortlog:
479 commit_msg_body += "\n\n"
480 commit_msg_body += format_list_to_str(shortlog, width=70)
481
482 commit_msg = commit_msg_subject + "\n\n" + commit_msg_body
483
484 commit_paths = []
485 commit_paths.append(
Sam Elliotta24497b2020-04-23 15:06:46 +0100486 path_resolve(import_desc['target_dir'], vendor_file_base_dir))
lowRISC Contributors802543a2019-08-31 12:12:56 +0100487 if args.refresh_patches:
488 commit_paths.append(
Sam Elliotta24497b2020-04-23 15:06:46 +0100489 path_resolve(import_desc['patch_dir'], vendor_file_base_dir))
lowRISC Contributors802543a2019-08-31 12:12:56 +0100490 commit_paths.append(lock_file_path)
491
492 git_add_commit(vendor_file_base_dir, commit_paths, commit_msg)
493
494 finally:
495 shutil.rmtree(str(clone_dir), ignore_errors=True)
496
497 log.info('Import finished')
498
499
500if __name__ == '__main__':
501 try:
502 main(sys.argv)
503 except subprocess.CalledProcessError as e:
504 log.fatal("Called program '%s' returned with %d.\n"
505 "STDOUT:\n%s\n"
506 "STDERR:\n%s\n" %
507 (" ".join(e.cmd), e.returncode, e.stdout, e.stderr))
508 raise