[util] get-toolchain.py: Support JSON buildinfo files

Current toolchain builds from
https://github.com/lowRISC/lowrisc-toolchains contain a human-readable
"buildinfo" file with the version of the build, among other things.
Since https://github.com/lowRISC/lowrisc-toolchains/pull/6 the toolchain
builds also contain a buildinfo.json file with a superset of this
information in JSON, which is much better to read from a script.

This commit updates the get-toolchain.py script to make use of this new
JSON build information, if available, and fall back to the old plaintext
file otherwise.

To help the implementation of that feature I also did the following
cleanups:

- (Behavior) Remove the `--force` option, and imply it with `--update`
  if a existing toolchain installation was detected; bail out with an
  error if the target path doesn't a toolchain yet. This gives us
  roughly the same safety net as before, while simplifying the logic.
- (Fix) Fix buildinfo regex to work with all known buildinfo files.
- (Refactoring) Use pathlib.Path consistently.
- (Refactoring) Shuffle around functions to get build information, and
  improve their naming.

Overall we get a slight decrease in lines-of-code and more
functionality.

Signed-off-by: Philipp Wagner <phw@lowrisc.org>
diff --git a/util/get-toolchain.py b/util/get-toolchain.py
index 1276294..73d82e3 100755
--- a/util/get-toolchain.py
+++ b/util/get-toolchain.py
@@ -5,164 +5,151 @@
 
 import argparse
 import json
-import os
-from pathlib import Path
+import logging as log
 import re
-import subprocess
 import shutil
+import subprocess
 import sys
 import tempfile
-import time
+from pathlib import Path
 from urllib.request import urlopen, urlretrieve
 
+log.basicConfig(level=log.INFO, format="%(levelname)s: %(message)s")
+
 ASSET_PREFIX = "lowrisc-toolchain-gcc-rv32imc-"
 ASSET_SUFFIX = ".tar.xz"
-BUILDINFO_FILENAME = "buildinfo"
 RELEASES_URL_BASE = 'https://api.github.com/repos/lowRISC/lowrisc-toolchains/releases'
+
 TARGET_DIR = '/tools/riscv'
 TOOLCHAIN_VERSION = 'latest'
-VERSION_RE=r"(lowRISC toolchain version|Version):\s*\n(?P<version>\d+(-\d+)?)"
 
 
-def prompt_yes_no(msg):
-    while True:
-        print(msg, end=" ")
-        response = input().lower()
-        if response in ('yes', 'y'):
-            return True
-        elif response in ('no', 'n'):
-            return False
-        else:
-            print('Invalid response. Valid options are "yes" or "no"')
-
-
-def get_release_info(toolchain_version):
-    if toolchain_version == 'latest':
-        releases_url = '%s/%s' % (RELEASES_URL_BASE, toolchain_version)
+def get_available_toolchain_info(version):
+    if version == 'latest':
+        releases_url = '%s/%s' % (RELEASES_URL_BASE, version)
     else:
-        releases_url = '%s/tags/%s' % (RELEASES_URL_BASE, toolchain_version)
+        releases_url = '%s/tags/%s' % (RELEASES_URL_BASE, version)
+
     with urlopen(releases_url) as f:
-        return json.loads(f.read().decode('utf-8'))
+        release_info = json.loads(f.read().decode('utf-8'))
 
-
-def get_download_url(release_info):
-    return [
+    download_url = [
         a["browser_download_url"] for a in release_info["assets"]
         if (a["name"].startswith(ASSET_PREFIX) and
             a["name"].endswith(ASSET_SUFFIX))
     ][0]
 
-
-def get_release_tag(release_info):
-    return release_info["tag_name"]
+    return {'download_url': download_url, 'version': release_info['tag_name']}
 
 
-def get_release_tag_from_file(buildinfo_file):
-    """Extracts version tag from buildinfo file.
+def get_installed_toolchain_info(install_path):
 
-    Args:
-        buildinfo_file: Path to buildinfo_file.
-    Returns:
-        Release tag string if available, otherwise None.
-    """
-    with open(buildinfo_file, 'r') as f:
-        match = re.match(VERSION_RE, f.read(), re.M)
-    if not match:
+    # Try new-style buildinfo.json first
+    try:
+        buildinfo = {}
+        with open(str(install_path / 'buildinfo.json'), 'r') as f:
+            buildinfo = json.loads(f.read())
+        return buildinfo
+    except Exception as e:
+        # buildinfo.json might not exist in older builds
+        log.info("Unable to parse buildinfo.json: %s", str(e))
+        pass
+
+    # If that wasn't successful, try old-style plaintext buildinfo
+    version_re = r"(lowRISC toolchain version|Version):\s*\n?(?P<version>[^\n\s]+)"
+    buildinfo_txt_path = install_path / 'buildinfo'
+    try:
+        with open(str(buildinfo_txt_path), 'r') as f:
+            match = re.match(version_re, f.read(), re.M)
+        if not match:
+            log.warning("Unable extract version from %s",
+                        str(buildinfo_txt_path))
+            return None
+        return {'version': match.group("version")}
+    except Exception as e:
+        log.error("Unable to read %s: %s", str(buildinfo_txt_path), str(e))
         return None
-    return match.group("version")
 
 
 def download(url):
-    print("Downloading toolchain from %s" % (url, ))
-    tmpfile = tempfile.mktemp()
+    log.info("Downloading toolchain from %s", url)
+    tmpfile = tempfile.mkstemp()[1]
     urlretrieve(url, tmpfile)
-    return tmpfile
+    return Path(tmpfile)
 
 
 def install(archive_file, target_dir):
-    os.makedirs(target_dir)
+    target_dir.mkdir(parents=True, exist_ok=True)
 
     cmd = [
-        'tar', '-x', '-f', archive_file, '--strip-components=1', '-C',
-        target_dir
+        'tar', '-x', '-f', str(archive_file), '--strip-components=1', '-C',
+        str(target_dir)
     ]
     subprocess.run(cmd, check=True)
 
 
 def main():
     parser = argparse.ArgumentParser()
-    parser.add_argument(
-        '--target-dir',
-        '-t',
-        required=False,
-        default=TARGET_DIR,
-        help="Target directory (must not exist) (default: %(default)s)")
-    parser.add_argument(
-        '--release-version',
-        '-r',
-        required=False,
-        default=TOOLCHAIN_VERSION,
-        help="Toolchain version (default: %(default)s)")
+    parser.add_argument('--target-dir',
+                        '-t',
+                        required=False,
+                        default=TARGET_DIR,
+                        help="Target directory (default: %(default)s)")
+    parser.add_argument('--release-version',
+                        '-r',
+                        required=False,
+                        default=TOOLCHAIN_VERSION,
+                        help="Toolchain version (default: %(default)s)")
     parser.add_argument(
         '--update',
         '-u',
         required=False,
         default=False,
         action='store_true',
-        help="Set to update to target version if needed (default: %(default)s)")
-    parser.add_argument(
-        '--force',
-        '-f',
-        required=False,
-        default=False,
-        action='store_true',
-        help="Set to skip directory erase prompt when --update is set "
-            "(default: %(default)s)")
+        help="Update to target version if needed (default: %(default)s)")
     args = parser.parse_args()
 
-    target_dir = args.target_dir
-    toolchain_version = args.release_version
+    target_dir = Path(args.target_dir)
 
-    if not args.update and os.path.exists(args.target_dir):
-        sys.exit('Target directory %s already exists. Delete it first '
-             'it you want to re-download the toolchain.' % (target_dir, ))
+    available_toolchain = get_available_toolchain_info(args.release_version)
 
-    release_info = get_release_info(toolchain_version)
+    if args.update and target_dir.is_dir():
+        installed_toolchain = get_installed_toolchain_info(target_dir)
+        if installed_toolchain is None:
+            sys.exit('Unable to extract current toolchain version. '
+                     'Delete target directory and try again.')
 
-    if args.update and os.path.exists(args.target_dir):
-        buildinfo_file = str(Path(target_dir) / BUILDINFO_FILENAME)
-        if not os.path.exists(buildinfo_file):
-            sys.exit('Unable to find buildinfo file at %s. Delete target '
-                'directory and try again.' % buildinfo_file)
-        current_release_tag = get_release_tag_from_file(buildinfo_file)
-        if not current_release_tag and not args.force:
-            # If args.force is set then we can skip this error condition. The
-            # version check test condition will also fail, and the install
-            # will continue.
-            sys.exit('Unable to extract current toolchain version from %s. '
-                'Delete target directory and try again.' % buildinfo_file)
-        if get_release_tag(release_info) == current_release_tag:
-            print('Toolchain version %s already installed at %s. Skipping '
-                'install.' % (current_release_tag, target_dir))
-            return
+        if available_toolchain['version'] == installed_toolchain['version']:
+            log.info(
+                'Toolchain version %s already installed at %s. Skipping '
+                'install.', installed_toolchain['version'], str(target_dir))
+            sys.exit(0)
 
-    download_url = get_download_url(release_info)
+        log.info(
+            "Found installed toolchain version %s, updating to version %s.",
+            installed_toolchain['version'], available_toolchain['version'])
+    else:
+        if target_dir.exists():
+            sys.exit(
+                'Target directory %s already exists. Delete it first, or use --update.'
+                % str(target_dir))
+
+    archive_file = None
     try:
-        archive_file = download(download_url)
-        if args.update and os.path.exists(args.target_dir):
-            # Warn the user before deleting the target directory.
-            warning_msg = 'WARNING: Removing directory: %s.' % target_dir
-            if not args.force:
-                if not prompt_yes_no(warning_msg + ' Continue [yes/no]:'):
-                    sys.exit('Aborting update.')
-            else:
-                print(warning_msg)
+        archive_file = download(available_toolchain['download_url'])
+
+        if args.update and target_dir.exists():
+            # We only reach this point if |target_dir| contained a toolchain
+            # before, so removing it is reasonably safe.
             shutil.rmtree(target_dir)
+
         install(archive_file, target_dir)
     finally:
-        os.remove(archive_file)
+        if archive_file:
+            archive_file.unlink()
 
-    print('Toolchain downloaded and installed to %s' % (target_dir, ))
+    log.info('Toolchain version %s downloaded and installed to %s.',
+             available_toolchain['version'], str(target_dir))
 
 
 if __name__ == "__main__":