blob: 8c677f92887b15cd7976072b79f267b28153aa14 [file] [log] [blame]
lowRISC Contributors802543a2019-08-31 12:12:56 +01001#!/usr/bin/env python3
2# Copyright lowRISC contributors.
3# Licensed under the Apache License, Version 2.0, see LICENSE for details.
4# SPDX-License-Identifier: Apache-2.0
5
Rupert Swarbrick002f6252020-05-13 12:15:39 +01006'''A tool to copy source code from upstream into this repository.
7
8For an introduction to using this tool, see doc/ug/vendor_hw.md in this
9repository (on the internet at https://docs.opentitan.org/doc/ug/vendor_hw/).
10
11For full documentation, see doc/rm/vendor_in_tool.md (on the internet at
12https://docs.opentitan.org/doc/rm/vendor_in_tool).
13
14'''
15
lowRISC Contributors802543a2019-08-31 12:12:56 +010016import argparse
17import fnmatch
18import logging as log
19import os
20import re
21import shutil
22import subprocess
23import sys
24import tempfile
25import textwrap
26from pathlib import Path
27
28import hjson
29
lowRISC Contributors802543a2019-08-31 12:12:56 +010030EXCLUDE_ALWAYS = ['.git']
31
32LOCK_FILE_HEADER = """// Copyright lowRISC contributors.
33// Licensed under the Apache License, Version 2.0, see LICENSE for details.
34// SPDX-License-Identifier: Apache-2.0
35
Miguel Osorio0bcb3512019-11-15 12:34:32 -080036// This file is generated by the util/vendor script. Please do not modify it
lowRISC Contributors802543a2019-08-31 12:12:56 +010037// manually.
38
39"""
40
Philipp Wagnerce40dbf2020-05-29 13:27:41 +010041# Keys in the description (configuration) file which can be overridden through
42# the command line.
43OVERRIDABLE_DESC_KEYS = [
44 'patch_repo.url',
45 'patch_repo.rev_base',
46 'patch_repo.rev_patched',
47 'upstream.url',
48 'upstream.ref',
49]
50
lowRISC Contributors802543a2019-08-31 12:12:56 +010051verbose = False
52
53
54def git_is_clean_workdir(git_workdir):
55 """Check if the git working directory is clean (no unstaged or staged changes)"""
56 cmd = ['git', 'status', '--untracked-files=no', '--porcelain']
57 modified_files = subprocess.run(cmd,
Sam Elliotta24497b2020-04-23 15:06:46 +010058 cwd=str(git_workdir),
lowRISC Contributors802543a2019-08-31 12:12:56 +010059 check=True,
60 stdout=subprocess.PIPE,
61 stderr=subprocess.PIPE).stdout.strip()
62 return not modified_files
63
64
lowRISC Contributors802543a2019-08-31 12:12:56 +010065def github_qualify_references(log, repo_userorg, repo_name):
66 """ Replace "unqualified" GitHub references with "fully qualified" one
67
68 GitHub automatically links issues and pull requests if they have a specific
69 format. Links can be qualified with the user/org name and the repository
70 name, or unqualified, if they only contain the issue or pull request number.
71
72 This function converts all unqualified references to qualified ones.
73
74 See https://help.github.com/en/articles/autolinked-references-and-urls#issues-and-pull-requests
75 for a documentation of all supported formats.
76 """
77
78 r = re.compile(r"(^|[^\w])(?:#|[gG][hH]-)(\d+)\b")
79 repl_str = r'\1%s/%s#\2' % (repo_userorg, repo_name)
80 return [r.sub(repl_str, l) for l in log]
81
82
83def test_github_qualify_references():
84 repo_userorg = 'lowRISC'
85 repo_name = 'ibex'
86
87 # Unqualified references, should be replaced
88 items_unqualified = [
89 '#28',
90 'GH-27',
91 'klaus #27',
92 'Fixes #27',
93 'Fixes #27 and #28',
94 '(#27)',
95 'something (#27) done',
96 '#27 and (GH-38)',
97 ]
98 exp_items_unqualified = [
99 'lowRISC/ibex#28',
100 'lowRISC/ibex#27',
101 'klaus lowRISC/ibex#27',
102 'Fixes lowRISC/ibex#27',
103 'Fixes lowRISC/ibex#27 and lowRISC/ibex#28',
104 '(lowRISC/ibex#27)',
105 'something (lowRISC/ibex#27) done',
106 'lowRISC/ibex#27 and (lowRISC/ibex#38)',
107 ]
108 assert github_qualify_references(items_unqualified, repo_userorg,
109 repo_name) == exp_items_unqualified
110
111 # Qualified references, should stay as they are
112 items_qualified = [
113 'Fixes lowrisc/ibex#27',
114 'lowrisc/ibex#2',
115 ]
116 assert github_qualify_references(items_qualified, repo_userorg,
117 repo_name) == items_qualified
118
119 # Invalid references, should stay as they are
120 items_invalid = [
121 'something#27',
122 'lowrisc/ibex#',
123 ]
124 assert github_qualify_references(items_invalid, repo_userorg,
125 repo_name) == items_invalid
126
127
128def test_github_parse_url():
129 assert github_parse_url('https://example.com/something/asdf.git') is None
130 assert github_parse_url('https://github.com/lowRISC/ibex.git') == (
131 'lowRISC', 'ibex')
132 assert github_parse_url('https://github.com/lowRISC/ibex') == ('lowRISC',
133 'ibex')
134 assert github_parse_url('git@github.com:lowRISC/ibex.git') == ('lowRISC',
135 'ibex')
136
137
138def github_parse_url(github_repo_url):
139 """Parse a GitHub repository URL into its parts.
140
141 Return a tuple (userorg, name), or None if the parsing failed.
142 """
143
144 regex = r"(?:@github\.com\:|\/github\.com\/)([a-zA-Z\d-]+)\/([a-zA-Z\d-]+)(?:\.git)?$"
145 m = re.search(regex, github_repo_url)
146 if m is None:
147 return None
148 return (m.group(1), m.group(2))
149
150
Rupert Swarbrick249b4c32020-05-19 17:11:31 +0100151def produce_shortlog(clone_dir, mapping, old_rev, new_rev):
lowRISC Contributors802543a2019-08-31 12:12:56 +0100152 """ Produce a list of changes between two revisions, one revision per line
153
154 Merges are excluded"""
Rupert Swarbrick249b4c32020-05-19 17:11:31 +0100155
156 # If mapping is None, we want to list all changes below clone_dir.
157 # Otherwise, we want to list changes in each 'source' in the mapping. Since
158 # these strings are paths relative to clone_dir, we can just pass them all
159 # to git and let it figure out what to do.
160 subdirs = (['.'] if mapping is None
Philipp Wagner3f35d4e2020-05-26 19:37:01 +0100161 else [m.from_path for m in mapping.items])
Rupert Swarbrick249b4c32020-05-19 17:11:31 +0100162
163 cmd = (['git', '-C', str(clone_dir), 'log',
164 '--pretty=format:%s (%aN)', '--no-merges',
165 old_rev + '..' + new_rev] +
166 subdirs)
lowRISC Contributors802543a2019-08-31 12:12:56 +0100167 try:
168 proc = subprocess.run(cmd,
Sam Elliotta24497b2020-04-23 15:06:46 +0100169 cwd=str(clone_dir),
lowRISC Contributors802543a2019-08-31 12:12:56 +0100170 check=True,
171 stdout=subprocess.PIPE,
172 stderr=subprocess.PIPE,
Silvestrs Timofejevs1bf7b0a2019-11-13 14:14:12 +0000173 universal_newlines=True)
lowRISC Contributors802543a2019-08-31 12:12:56 +0100174 return proc.stdout.splitlines()
175 except subprocess.CalledProcessError as e:
176 log.error("Unable to capture shortlog: %s", e.stderr)
177 return ""
178
179
180def format_list_to_str(list, width=70):
181 """ Create Markdown-style formatted string from a list of strings """
182 wrapper = textwrap.TextWrapper(initial_indent="* ",
183 subsequent_indent=" ",
184 width=width)
185 return '\n'.join([wrapper.fill(s) for s in list])
186
187
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100188class JsonError(Exception):
189 '''An error class for when data in the source HJSON is bad'''
190 def __init__(self, path, msg):
191 self.path = path
192 self.msg = msg
193
194 def __str__(self):
195 return 'In hjson at {}, {}'.format(self.path, self.msg)
196
197
198def get_field(path, where, data, name, expected_type=dict, optional=False, constructor=None):
199 value = data.get(name)
200 if value is None:
201 if not optional:
202 raise JsonError(path, '{}, missing {!r} field.'.format(where, name))
203 return None
204
205 if not isinstance(value, expected_type):
206 raise JsonError(path,
207 '{}, the {!r} field is {!r}, but should be of type {!r}.'
208 .format(where, name, value, expected_type.__name__))
209
210 return value if constructor is None else constructor(value)
211
212
213class Upstream:
214 '''A class representing the 'upstream' field in a config or lock file'''
215 def __init__(self, path, data):
216 # Fields: 'url', 'rev', 'only_subdir' (optional). All should be strings.
217 where = 'in upstream dict'
218 self.url = get_field(path, where, data, 'url', str)
219 self.rev = get_field(path, where, data, 'rev', str)
220 self.only_subdir = get_field(path, where, data,
221 'only_subdir', str, optional=True)
222
223 def as_dict(self):
224 data = {'url': self.url, 'rev': self.rev}
225 if self.only_subdir is not None:
226 data['only_subdir'] = self.only_subdir
227 return data
228
229
230class PatchRepo:
231 '''A class representing the 'patch_repo' field in a config file'''
232 def __init__(self, path, data):
233 # Fields: 'url', 'rev_base', 'rev_patched'. All should be strings.
234 where = 'in patch_repo dict'
235 self.url = get_field(path, where, data, 'url', str)
236 self.rev_base = get_field(path, where, data, 'rev_base', str)
237 self.rev_patched = get_field(path, where, data, 'rev_patched', str)
238
239
Rupert Swarbrick249b4c32020-05-19 17:11:31 +0100240class Mapping1:
241 '''A class to represent a single item in the 'mapping' field in a config file'''
242 def __init__(self, from_path, to_path, patch_dir):
243 self.from_path = from_path
244 self.to_path = to_path
245 self.patch_dir = patch_dir
246
247 @staticmethod
248 def make(path, idx, data):
249 assert isinstance(data, dict)
250
251 def get_path(name, optional=False):
252 val = get_field(path, 'in mapping entry {}'.format(idx + 1),
253 data, name, expected_type=str, optional=optional)
254 if val is None:
255 return None
256
257 # Check that the paths aren't evil ('../../../foo' or '/etc/passwd'
258 # are *not* ok!)
259 val = os.path.normpath(val)
260 if val.startswith('/') or val.startswith('..'):
261 raise JsonError(path,
262 'Mapping entry {} has a bad path for {!r} '
263 '(must be a relative path that doesn\'t '
264 'escape the directory)'
265 .format(idx + 1, name))
266
267 return Path(val)
268
269 from_path = get_path('from')
270 to_path = get_path('to')
271 patch_dir = get_path('patch_dir', optional=True)
272
273 return Mapping1(from_path, to_path, patch_dir)
274
275 @staticmethod
Rupert Swarbrick364c1942020-05-28 17:17:40 +0100276 def make_default(have_patch_dir):
Rupert Swarbrick249b4c32020-05-19 17:11:31 +0100277 '''Make a default mapping1, which copies everything straight through'''
Rupert Swarbrick364c1942020-05-28 17:17:40 +0100278 return Mapping1(Path('.'), Path('.'),
279 Path('.') if have_patch_dir else None)
Rupert Swarbrick249b4c32020-05-19 17:11:31 +0100280
281 @staticmethod
282 def apply_patch(basedir, patchfile):
Rupert Swarbrick364c1942020-05-28 17:17:40 +0100283 cmd = ['git', 'apply', '--directory', str(basedir), '-p1',
284 str(patchfile)]
Rupert Swarbrick249b4c32020-05-19 17:11:31 +0100285 if verbose:
286 cmd += ['--verbose']
Rupert Swarbrick364c1942020-05-28 17:17:40 +0100287 subprocess.run(cmd, check=True)
Rupert Swarbrick249b4c32020-05-19 17:11:31 +0100288
289 def import_from_upstream(self, upstream_path,
290 target_path, exclude_files, patch_dir):
291 '''Copy from the upstream checkout to target_path'''
292 from_path = upstream_path / self.from_path
293 to_path = target_path / self.to_path
294
295 # Make sure the target directory actually exists
296 to_path.parent.mkdir(exist_ok=True, parents=True)
297
298 # Copy src to dst recursively. For directories, we can use
299 # shutil.copytree. This doesn't support files, though, so we have to
300 # check for them first.
301 if from_path.is_file():
302 shutil.copy(str(from_path), str(to_path))
303 else:
304 ignore = ignore_patterns(str(upstream_path), *exclude_files)
305 shutil.copytree(str(from_path), str(to_path), ignore=ignore)
306
307 # Apply any patches to the copied files. If self.patch_dir is None,
308 # there are none to apply. Otherwise, resolve it relative to patch_dir.
309 if self.patch_dir is not None:
310 patches = (patch_dir / self.patch_dir).glob('*.patch')
Rupert Swarbrickbefd89b2020-05-28 17:16:43 +0100311 for patch in sorted(patches):
Rupert Swarbrick249b4c32020-05-19 17:11:31 +0100312 log.info("Applying patch {} at {}".format(patch, to_path))
313 Mapping1.apply_patch(to_path, patch)
314
315
316class Mapping:
317 '''A class representing the 'mapping' field in a config file
318
319 This should be a list of dicts.
320 '''
321 def __init__(self, items):
322 self.items = items
323
324 @staticmethod
325 def make(path, data):
326 items = []
327 assert isinstance(data, list)
328 for idx, elt in enumerate(data):
329 if not isinstance(elt, dict):
330 raise JsonError(path, 'Mapping element {!r} is not a dict.'.format(elt))
331 items.append(Mapping1.make(path, idx, elt))
332
333 return Mapping(items)
334
335 def has_patch_dir(self):
336 '''Check whether at least one item defines a patch dir'''
337 for item in self.items:
338 if item.patch_dir is not None:
339 return True
340 return False
341
342
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100343class LockDesc:
344 '''A class representing the contents of a lock file'''
345 def __init__(self, handle):
346 data = hjson.loads(handle.read(), use_decimal=True)
347 self.upstream = get_field(handle.name, 'at top-level', data, 'upstream',
348 constructor=lambda data: Upstream(handle.name, data))
349
350
351class Desc:
352 '''A class representing the configuration file'''
Philipp Wagnerce40dbf2020-05-29 13:27:41 +0100353
354 def __init__(self, handle, desc_overrides):
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100355
356 # Ensure description file matches our naming rules (otherwise we don't
357 # know the name for the lockfile). This regex checks that we have the
358 # right suffix and a nonempty name.
359 if not re.match(r'.+\.vendor\.hjson', handle.name):
360 raise ValueError("Description file names must have a .vendor.hjson suffix.")
361
362 data = hjson.loads(handle.read(), use_decimal=True)
363 where = 'at top-level'
364
Philipp Wagnerce40dbf2020-05-29 13:27:41 +0100365 self.apply_overrides(data, desc_overrides)
366
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100367 path = Path(handle.name)
368
369 def take_path(p):
370 return path.parent / p
371
372 self.path = path
373 self.name = get_field(path, where, data, 'name', expected_type=str)
374 self.target_dir = get_field(path, where, data, 'target_dir',
375 expected_type=str, constructor=take_path)
376 self.upstream = get_field(path, where, data, 'upstream',
377 constructor=lambda data: Upstream(path, data))
378 self.patch_dir = get_field(path, where, data, 'patch_dir',
379 optional=True, expected_type=str, constructor=take_path)
380 self.patch_repo = get_field(path, where, data, 'patch_repo',
381 optional=True,
382 constructor=lambda data: PatchRepo(path, data))
383 self.exclude_from_upstream = (get_field(path, where, data, 'exclude_from_upstream',
384 optional=True, expected_type=list) or
385 [])
Rupert Swarbrick249b4c32020-05-19 17:11:31 +0100386 self.mapping = get_field(path, where, data, 'mapping', optional=True,
387 expected_type=list,
388 constructor=lambda data: Mapping.make(path, data))
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100389
390 # Add default exclusions
391 self.exclude_from_upstream += EXCLUDE_ALWAYS
392
Rupert Swarbrick249b4c32020-05-19 17:11:31 +0100393 # It doesn't make sense to define a patch_repo, but not a patch_dir
394 # (where should we put the patches that we get?)
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100395 if self.patch_repo is not None and self.patch_dir is None:
396 raise JsonError(path, 'Has patch_repo but not patch_dir.')
Rupert Swarbrick249b4c32020-05-19 17:11:31 +0100397
398 # We don't currently support a patch_repo and a mapping (just because
399 # we haven't written the code to generate the patches across subdirs
400 # yet). Tracked in issue #2317.
401 if self.patch_repo is not None and self.mapping is not None:
402 raise JsonError(path,
403 "vendor.py doesn't currently support patch_repo "
404 "and mapping at the same time (see issue #2317).")
405
406 # If a patch_dir is defined and there is no mapping, we will look in
407 # that directory for patches and apply them in (the only) directory
408 # that we copy stuff into.
409 #
410 # If there is a mapping check that there is a patch_dir if and only if
411 # least one mapping entry uses it.
412 if self.mapping is not None:
413 if self.patch_dir is not None:
414 if not self.mapping.has_patch_dir():
415 raise JsonError(path, 'Has patch_dir, but no mapping item uses it.')
416 else:
417 if self.mapping.has_patch_dir():
418 raise JsonError(path,
419 'Has a mapping item with a patch directory, '
420 'but there is no global patch_dir key.')
421
422 # Check that exclude_from_upstream really is a list of strings. Most of
423 # this type-checking is in the constructors for field types, but we
424 # don't have a "ExcludeList" class, so have to do it explicitly here.
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100425 for efu in self.exclude_from_upstream:
426 if not isinstance(efu, str):
427 raise JsonError(path,
428 'exclude_from_upstream has entry {}, which is not a string.'
429 .format(efu))
430
Philipp Wagnerce40dbf2020-05-29 13:27:41 +0100431 def apply_overrides(self, desc_data, desc_overrides):
432 """ Apply overrides from command line to configuration file data
433
434 Updates are applied to the desc_data reference."""
435
436 for key, value in desc_overrides:
437 log.info("Overriding description key {!r} with value {!r}".format(
438 key, value))
439 ref = desc_data
440 split_keys = key.split('.')
441 for key_part in split_keys[:-1]:
442 if key_part not in ref:
443 ref[key_part] = {}
444 ref = ref[key_part]
445 ref[split_keys[-1]] = value
446
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100447 def lock_file_path(self):
448 desc_file_stem = self.path.name.rsplit('.', 2)[0]
449 return self.path.with_name(desc_file_stem + '.lock.hjson')
450
Rupert Swarbrick249b4c32020-05-19 17:11:31 +0100451 def import_from_upstream(self, upstream_path):
452 log.info('Copying upstream sources to {}'.format(self.target_dir))
453
454 # Remove existing directories before importing them again
455 shutil.rmtree(str(self.target_dir), ignore_errors=True)
456
457 items = (self.mapping.items if self.mapping is not None
Rupert Swarbrick364c1942020-05-28 17:17:40 +0100458 else [Mapping1.make_default(self.patch_dir is not None)])
Rupert Swarbrick249b4c32020-05-19 17:11:31 +0100459 for map1 in items:
460 map1.import_from_upstream(upstream_path,
461 self.target_dir,
462 self.exclude_from_upstream,
463 self.patch_dir)
464
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100465
lowRISC Contributors802543a2019-08-31 12:12:56 +0100466def refresh_patches(desc):
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100467 if desc.patch_repo is None:
lowRISC Contributors802543a2019-08-31 12:12:56 +0100468 log.fatal('Unable to refresh patches, patch_repo not set in config.')
469 sys.exit(1)
470
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100471 log.info('Refreshing patches in {}'.format(desc.patch_dir))
lowRISC Contributors802543a2019-08-31 12:12:56 +0100472
473 # remove existing patches
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100474 for patch in desc.patch_dir.glob('*.patch'):
lowRISC Contributors802543a2019-08-31 12:12:56 +0100475 os.unlink(str(patch))
476
477 # get current patches
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100478 _export_patches(desc.patch_repo.url, desc.patch_dir,
479 desc.patch_repo.rev_base,
480 desc.patch_repo.rev_patched)
lowRISC Contributors802543a2019-08-31 12:12:56 +0100481
482
483def _export_patches(patchrepo_clone_url, target_patch_dir, upstream_rev,
484 patched_rev):
Rupert Swarbrick17c6e3b2020-04-28 15:17:32 +0100485 with tempfile.TemporaryDirectory() as clone_dir:
lowRISC Contributors802543a2019-08-31 12:12:56 +0100486 clone_git_repo(patchrepo_clone_url, clone_dir, patched_rev)
487 rev_range = 'origin/' + upstream_rev + '..' + 'origin/' + patched_rev
Philipp Wagner5557f962020-09-21 15:10:56 +0100488 cmd = [
489 'git',
490 'format-patch',
491 '--no-signature',
492 '--no-stat',
493 '-o',
494 str(target_patch_dir.resolve()),
495 rev_range
496 ]
lowRISC Contributors802543a2019-08-31 12:12:56 +0100497 if not verbose:
498 cmd += ['-q']
Sam Elliotta24497b2020-04-23 15:06:46 +0100499 subprocess.run(cmd, cwd=str(clone_dir), check=True)
lowRISC Contributors802543a2019-08-31 12:12:56 +0100500
lowRISC Contributors802543a2019-08-31 12:12:56 +0100501
Rupert Swarbrick249b4c32020-05-19 17:11:31 +0100502def ignore_patterns(base_dir, *patterns):
503 """Similar to shutil.ignore_patterns, but with support for directory excludes."""
504 def _rel_to_base(path, name):
505 return os.path.relpath(os.path.join(path, name), base_dir)
lowRISC Contributors802543a2019-08-31 12:12:56 +0100506
Rupert Swarbrick249b4c32020-05-19 17:11:31 +0100507 def _ignore_patterns(path, names):
508 ignored_names = []
509 for pattern in patterns:
510 pattern_matches = [
511 n for n in names
512 if fnmatch.fnmatch(_rel_to_base(path, n), pattern)
513 ]
514 ignored_names.extend(pattern_matches)
515 return set(ignored_names)
lowRISC Contributors802543a2019-08-31 12:12:56 +0100516
Rupert Swarbrick249b4c32020-05-19 17:11:31 +0100517 return _ignore_patterns
lowRISC Contributors802543a2019-08-31 12:12:56 +0100518
519
520def clone_git_repo(repo_url, clone_dir, rev='master'):
521 log.info('Cloning upstream repository %s @ %s', repo_url, rev)
522
Sam Elliotta24497b2020-04-23 15:06:46 +0100523 # Clone the whole repository
524 cmd = ['git', 'clone', '--no-single-branch']
525 if not verbose:
526 cmd += ['-q']
527 cmd += [repo_url, str(clone_dir)]
528 subprocess.run(cmd, check=True)
529
530 # Check out exactly the revision requested
Sam Elliottcba2a672020-05-12 17:33:06 +0100531 cmd = ['git', '-C', str(clone_dir), 'checkout', '--force', rev]
lowRISC Contributors802543a2019-08-31 12:12:56 +0100532 if not verbose:
533 cmd += ['-q']
534 subprocess.run(cmd, check=True)
535
536 # Get revision information
537 cmd = ['git', '-C', str(clone_dir), 'rev-parse', 'HEAD']
538 rev = subprocess.run(cmd,
539 stdout=subprocess.PIPE,
540 stderr=subprocess.PIPE,
541 check=True,
Silvestrs Timofejevs1bf7b0a2019-11-13 14:14:12 +0000542 universal_newlines=True).stdout.strip()
lowRISC Contributors802543a2019-08-31 12:12:56 +0100543 log.info('Cloned at revision %s', rev)
544 return rev
545
546
547def git_get_short_rev(clone_dir, rev):
548 """ Get the shortened SHA-1 hash for a revision """
549 cmd = ['git', '-C', str(clone_dir), 'rev-parse', '--short', rev]
550 short_rev = subprocess.run(cmd,
551 stdout=subprocess.PIPE,
552 stderr=subprocess.PIPE,
553 check=True,
Silvestrs Timofejevs1bf7b0a2019-11-13 14:14:12 +0000554 universal_newlines=True).stdout.strip()
lowRISC Contributors802543a2019-08-31 12:12:56 +0100555 return short_rev
556
557
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100558def git_add_commit(paths, commit_msg):
lowRISC Contributors802543a2019-08-31 12:12:56 +0100559 """ Stage and commit all changes in paths"""
560
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100561 assert paths
562 base_dir = paths[0].parent
lowRISC Contributors802543a2019-08-31 12:12:56 +0100563
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100564 # Stage all changes
565 #
566 # Rather than figuring out GIT_DIR properly, we cheat and use "git -C" to
567 # pretend that we're running in base_dir. Of course, the elements of paths
568 # are relative to our actual working directory. Rather than do anything
569 # clever, we just resolve them to absolute paths as we go.
570 abs_paths = [p.resolve() for p in paths]
571 subprocess.run(['git', '-C', base_dir, 'add'] + abs_paths, check=True)
572
573 cmd_commit = ['git', '-C', base_dir, 'commit', '-s', '-F', '-']
lowRISC Contributors802543a2019-08-31 12:12:56 +0100574 try:
575 subprocess.run(cmd_commit,
576 check=True,
Silvestrs Timofejevs1bf7b0a2019-11-13 14:14:12 +0000577 universal_newlines=True,
lowRISC Contributors802543a2019-08-31 12:12:56 +0100578 input=commit_msg)
Rupert Swarbrickcdcacee2020-04-28 11:35:49 +0100579 except subprocess.CalledProcessError:
lowRISC Contributors802543a2019-08-31 12:12:56 +0100580 log.warning("Unable to create commit. Are there no changes?")
581
582
Philipp Wagnerce40dbf2020-05-29 13:27:41 +0100583def define_arg_type(arg):
584 """Sanity-check and return a config file override argument"""
585 try:
586 (key, value) = [v.strip() for v in arg.split('=', 2)]
587 except Exception:
588 raise argparse.ArgumentTypeError(
589 'unable to parse {!r}: configuration overrides must be in the form key=value'
590 .format(arg))
591
592 if key not in OVERRIDABLE_DESC_KEYS:
593 raise argparse.ArgumentTypeError(
594 'invalid configuration override: key {!r} cannot be overwritten'
595 .format(key))
596 return (key, value)
597
598
lowRISC Contributors802543a2019-08-31 12:12:56 +0100599def main(argv):
Rupert Swarbrick002f6252020-05-13 12:15:39 +0100600 parser = argparse.ArgumentParser(prog="vendor", description=__doc__)
Sam Elliotta24497b2020-04-23 15:06:46 +0100601 parser.add_argument(
602 '--update',
603 '-U',
604 dest='update',
605 action='store_true',
606 help='Update locked version of repository with upstream changes')
lowRISC Contributors802543a2019-08-31 12:12:56 +0100607 parser.add_argument('--refresh-patches',
608 action='store_true',
609 help='Refresh the patches from the patch repository')
610 parser.add_argument('--commit',
611 '-c',
612 action='store_true',
613 help='Commit the changes')
Philipp Wagnerce40dbf2020-05-29 13:27:41 +0100614 parser.add_argument('--desc-override',
615 '-D',
616 dest="desc_overrides",
617 action="append",
618 type=define_arg_type,
619 default=[],
620 help='Override a setting in the description file. '
621 'Format: -Dsome.key=value. '
622 'Can be used multiple times.')
lowRISC Contributors802543a2019-08-31 12:12:56 +0100623 parser.add_argument('desc_file',
624 metavar='file',
625 type=argparse.FileType('r', encoding='UTF-8'),
626 help='vendoring description file (*.vendor.hjson)')
627 parser.add_argument('--verbose', '-v', action='store_true', help='Verbose')
628 args = parser.parse_args()
629
630 global verbose
631 verbose = args.verbose
632 if (verbose):
633 log.basicConfig(format="%(levelname)s: %(message)s", level=log.DEBUG)
634 else:
635 log.basicConfig(format="%(levelname)s: %(message)s")
636
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100637 # Load input files (desc file; lock file) and check syntax etc.
638 try:
639 # Load description file
Philipp Wagnerce40dbf2020-05-29 13:27:41 +0100640 desc = Desc(args.desc_file, args.desc_overrides)
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100641 lock_file_path = desc.lock_file_path()
lowRISC Contributors802543a2019-08-31 12:12:56 +0100642
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100643 # Try to load lock file (which might not exist)
644 try:
Sam Elliott481ceef2020-05-28 15:40:29 +0100645 with open(str(lock_file_path), 'r') as lock_file:
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100646 lock = LockDesc(lock_file)
647 except FileNotFoundError:
648 lock = None
649 except (JsonError, ValueError) as err:
650 log.fatal(str(err))
lowRISC Contributors802543a2019-08-31 12:12:56 +0100651 raise SystemExit(1)
652
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100653 # Check for a clean working directory when commit is requested
lowRISC Contributors802543a2019-08-31 12:12:56 +0100654 if args.commit:
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100655 if not git_is_clean_workdir(desc.path.parent):
lowRISC Contributors802543a2019-08-31 12:12:56 +0100656 log.fatal("A clean git working directory is required for "
657 "--commit/-c. git stash your changes and try again.")
658 raise SystemExit(1)
659
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100660 if lock is None and not args.update:
661 log.warning("No lock file at {}, so will update upstream repo."
Sam Elliott481ceef2020-05-28 15:40:29 +0100662 .format(str(desc.lock_file_path())))
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100663 args.update = True
lowRISC Contributors802543a2019-08-31 12:12:56 +0100664
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100665 # If we have a lock file and we're not in update mode, override desc's
666 # upstream field with the one from the lock file. Keep track of whether the
667 # URL differs (in which case, we can't get a shortlog)
668 changed_url = False
669 if lock is not None:
670 changed_url = desc.upstream.url != lock.upstream.url
Sam Elliotta24497b2020-04-23 15:06:46 +0100671 if not args.update:
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100672 desc.upstream = lock.upstream
lowRISC Contributors802543a2019-08-31 12:12:56 +0100673
674 if args.refresh_patches:
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100675 refresh_patches(desc)
lowRISC Contributors802543a2019-08-31 12:12:56 +0100676
Rupert Swarbrick17c6e3b2020-04-28 15:17:32 +0100677 with tempfile.TemporaryDirectory() as clone_dir:
lowRISC Contributors802543a2019-08-31 12:12:56 +0100678 # clone upstream repository
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100679 upstream_new_rev = clone_git_repo(desc.upstream.url, clone_dir, rev=desc.upstream.rev)
Sam Elliotta24497b2020-04-23 15:06:46 +0100680
681 if not args.update:
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100682 if upstream_new_rev != lock.upstream.rev:
Sam Elliotta24497b2020-04-23 15:06:46 +0100683 log.fatal(
684 "Revision mismatch. Unable to re-clone locked version of repository."
685 )
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100686 log.fatal("Attempted revision: %s", desc.upstream.rev)
Sam Elliotta24497b2020-04-23 15:06:46 +0100687 log.fatal("Re-cloned revision: %s", upstream_new_rev)
688 raise SystemExit(1)
lowRISC Contributors802543a2019-08-31 12:12:56 +0100689
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100690 clone_subdir = Path(clone_dir)
691 if desc.upstream.only_subdir is not None:
692 clone_subdir = clone_subdir / desc.upstream.only_subdir
Tim Shepard24b72032019-10-23 16:40:04 -0400693 if not clone_subdir.is_dir():
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100694 log.fatal("subdir '{}' does not exist in repo"
695 .format(desc.upstream.only_subdir))
Tim Shepard24b72032019-10-23 16:40:04 -0400696 raise SystemExit(1)
697
Rupert Swarbrick249b4c32020-05-19 17:11:31 +0100698 # copy selected files from upstream repo and apply patches as necessary
699 desc.import_from_upstream(clone_subdir)
lowRISC Contributors802543a2019-08-31 12:12:56 +0100700
701 # get shortlog
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100702 get_shortlog = args.update
703 if args.update:
704 if lock is None:
705 get_shortlog = False
706 log.warning("No lock file %s: unable to summarize changes.", str(lock_file_path))
707 elif changed_url:
708 get_shortlog = False
709 log.warning("The repository URL changed since the last run. "
710 "Unable to get log of changes.")
lowRISC Contributors802543a2019-08-31 12:12:56 +0100711
712 shortlog = None
713 if get_shortlog:
Rupert Swarbrick249b4c32020-05-19 17:11:31 +0100714 shortlog = produce_shortlog(clone_subdir, desc.mapping,
715 lock.upstream.rev, upstream_new_rev)
lowRISC Contributors802543a2019-08-31 12:12:56 +0100716
717 # Ensure fully-qualified issue/PR references for GitHub repos
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100718 gh_repo_info = github_parse_url(desc.upstream.url)
lowRISC Contributors802543a2019-08-31 12:12:56 +0100719 if gh_repo_info:
720 shortlog = github_qualify_references(shortlog, gh_repo_info[0],
721 gh_repo_info[1])
722
723 log.info("Changes since the last import:\n" +
724 format_list_to_str(shortlog))
725
726 # write lock file
Sam Elliotta24497b2020-04-23 15:06:46 +0100727 if args.update:
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100728 lock_data = {}
729 lock_data['upstream'] = desc.upstream.as_dict()
730 lock_data['upstream']['rev'] = upstream_new_rev
Sam Elliotta24497b2020-04-23 15:06:46 +0100731 with open(str(lock_file_path), 'w', encoding='UTF-8') as f:
732 f.write(LOCK_FILE_HEADER)
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100733 hjson.dump(lock_data, f)
Sam Elliotta24497b2020-04-23 15:06:46 +0100734 f.write("\n")
735 log.info("Wrote lock file %s", str(lock_file_path))
lowRISC Contributors802543a2019-08-31 12:12:56 +0100736
737 # Commit changes
738 if args.commit:
Tim Shepard24b72032019-10-23 16:40:04 -0400739 sha_short = git_get_short_rev(clone_subdir, upstream_new_rev)
lowRISC Contributors802543a2019-08-31 12:12:56 +0100740
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100741 repo_info = github_parse_url(desc.upstream.url)
lowRISC Contributors802543a2019-08-31 12:12:56 +0100742 if repo_info is not None:
743 sha_short = "%s/%s@%s" % (repo_info[0], repo_info[1],
744 sha_short)
745
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100746 commit_msg_subject = 'Update %s to %s' % (desc.name, sha_short)
747 intro = ('Update code from {}upstream repository {} to revision {}'
748 .format(('' if desc.upstream.only_subdir is None else
749 'subdir {} in '.format(desc.upstream.only_subdir)),
750 desc.upstream.url,
751 upstream_new_rev))
lowRISC Contributors802543a2019-08-31 12:12:56 +0100752 commit_msg_body = textwrap.fill(intro, width=70)
753
754 if shortlog:
755 commit_msg_body += "\n\n"
756 commit_msg_body += format_list_to_str(shortlog, width=70)
757
758 commit_msg = commit_msg_subject + "\n\n" + commit_msg_body
759
760 commit_paths = []
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100761 commit_paths.append(desc.target_dir)
lowRISC Contributors802543a2019-08-31 12:12:56 +0100762 if args.refresh_patches:
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100763 commit_paths.append(desc.patch_dir)
lowRISC Contributors802543a2019-08-31 12:12:56 +0100764 commit_paths.append(lock_file_path)
765
Rupert Swarbrick643e7c02020-05-13 14:18:43 +0100766 git_add_commit(commit_paths, commit_msg)
lowRISC Contributors802543a2019-08-31 12:12:56 +0100767
lowRISC Contributors802543a2019-08-31 12:12:56 +0100768 log.info('Import finished')
769
770
771if __name__ == '__main__':
772 try:
773 main(sys.argv)
774 except subprocess.CalledProcessError as e:
775 log.fatal("Called program '%s' returned with %d.\n"
776 "STDOUT:\n%s\n"
777 "STDERR:\n%s\n" %
778 (" ".join(e.cmd), e.returncode, e.stdout, e.stderr))
779 raise
Philipp Wagnerf6981982020-05-29 13:35:05 +0100780 except KeyboardInterrupt:
781 log.info("Aborting operation on user request.")
782 sys.exit(1)