blob: 020afd5e75b79cb123f17f2d26060654c42683c4 [file] [log] [blame]
#!/usr/bin/env python3
# Copyright 2021, NVIDIA Corporation
# SPDX-License-Identifier: MIT
"""
Sample parser for redistrib JSON manifests
1. Downloads each archive
2. Validates SHA256 checksums
3. Extracts archives
4. Flattens into a collapsed directory structure
Forked from https://github.com/NVIDIA/build-system-archive-import-examples/blob/355e25cca11725e88984443a6a343dffeb43308a/parse_redist.py
and patched:
- avoid a dependency on the non-standard requests package (see
the http_get helper) by using urllib directly
- explicit error handling on hash mismatch
- always download, even if files exist
- remove dependence on deprecated distutils copy_tree in favor of shutil
"""
import argparse
import os.path
import hashlib
import json
import re
import shutil
import tarfile
import zipfile
import sys
import urllib.request
__version__ = "0.1.0"
ARCHIVES = {}
DOMAIN = "https://developer.download.nvidia.com"
OUTPUT = "flat"
PRODUCT = None
LABEL = None
URL = None
OS = None
ARCH = None
PLATFORM = None
COMPONENT = None
# Default actions
RETRIEVE = True
VALIDATE = True
UNROLLED = True
COLLAPSE = True
def http_get(url):
"""Fetch the contents of a URL."""
with urllib.request.urlopen(url) as f:
data = f.read()
if hasattr(f, "status"):
# For >= 3.9
status_code = f.status
else:
# Deprecated in 3.9
statuc_code = f.code
if status_code != 200:
raise IOError(" -> Failed to download: " + url)
return data
def err(msg):
"""Print error message and exit"""
print("ERROR: " + msg)
sys.exit(1)
def fetch_file(full_path, filename):
"""Download file to disk"""
print(":: Fetching: " + full_path)
download_data = http_get(full_path)
with open(filename, "wb") as file:
file.write(download_data)
print(" -> Wrote: " + filename)
def get_hash(filename):
"""Calculate SHA256 checksum for file"""
buffer_size = 65536
sha256 = hashlib.sha256()
with open(filename, "rb") as file:
while True:
chunk = file.read(buffer_size)
if not chunk:
break
sha256.update(chunk)
return sha256.hexdigest()
def check_hash(filename, checksum):
"""Compare checksum with expected"""
sha256 = get_hash(filename)
if checksum == sha256:
print(" Verified sha256sum: " + sha256)
else:
raise IOError(f"Mismatch sha256sum: Calculation={sha256}, "
f"Expectation={checksum} for {filename}")
def flatten_tree(src, dest):
"""Merge hierarchy from multiple directories"""
# Should use shutil.copytree(dirs_exist_ok=True), but that isn't available
# until Python 3.8.
def copytree(src, dst):
if not os.path.exists(dst):
os.makedirs(dst)
for item in os.listdir(src):
s = os.path.join(src, item)
d = os.path.join(dst, item)
if os.path.isdir(s):
copytree(s, d)
else:
if not os.path.exists(d):
shutil.copy2(s, d)
try:
copytree(src, dest)
finally:
shutil.rmtree(src)
def fetch_action(parent):
"""Do actions while parsing JSON"""
for component in MANIFEST.keys():
if not 'name' in MANIFEST[component]:
continue
if COMPONENT is not None and component != COMPONENT:
continue
print("\n" + MANIFEST[component]['name'] + ": " +
MANIFEST[component]['version'])
for platform in MANIFEST[component].keys():
if not platform in ARCHIVES:
ARCHIVES[platform] = []
if not isinstance(MANIFEST[component][platform], str):
if PLATFORM is not None and platform != PLATFORM:
print(" -> Skipping platform: " + platform)
continue
full_path = parent + MANIFEST[component][platform]['relative_path']
filename = os.path.basename(full_path)
ARCHIVES[platform].append(filename)
if RETRIEVE:
# Download archive
fetch_file(full_path, filename)
elif os.path.exists(filename):
print(" -> Found: " + filename)
checksum = MANIFEST[component][platform]['sha256']
if VALIDATE and os.path.exists(filename):
# Compare checksum
check_hash(filename, checksum)
def post_action():
"""Extract archives and merge directories"""
if len(ARCHIVES) == 0:
return
print("\nArchives:")
if not os.path.exists(OUTPUT):
os.makedirs(OUTPUT)
for platform in ARCHIVES:
for archive in ARCHIVES[platform]:
# Tar files
if UNROLLED and re.search(r"\.tar\.", archive):
print(":: tar: " + archive)
tarball = tarfile.open(archive)
topdir = os.path.commonprefix(tarball.getnames())
tarball.extractall()
tarball.close()
print(" -> Extracted: " + topdir + "/")
if COLLAPSE:
flatten_tree(topdir, OUTPUT + "/" + platform)
# Zip files
elif UNROLLED and re.search(r"\.zip", archive):
print(":: zip: " + archive)
with zipfile.ZipFile(archive) as zippy:
topdir = os.path.commonprefix(zippy.namelist())
zippy.extractall()
zippy.close()
print(" -> Extracted: " + topdir)
if COLLAPSE:
flatten_tree(topdir, OUTPUT + "/" + platform)
print("\nOutput: " + OUTPUT + "/")
for item in sorted(os.listdir(OUTPUT)):
if os.path.isdir(OUTPUT + "/" + item):
print(" - " + item + "/")
elif os.path.isfile(OUTPUT + "/" + item):
print(" - " + item)
# If running standalone
if __name__ == '__main__':
# Parse CLI arguments
PARSER = argparse.ArgumentParser()
# Input options
PARSER_GROUP = PARSER.add_mutually_exclusive_group(required=True)
PARSER_GROUP.add_argument('-u', '--url', dest='url', help='URL to manifest')
PARSER_GROUP.add_argument('-l',
'--label',
dest='label',
help='Release label version')
PARSER.add_argument('-p', '--product', dest='product', help='Product name')
PARSER.add_argument('-o', '--output', dest='output', help='Output directory')
# Filter options
PARSER.add_argument('--component', dest='component', help='Component name')
PARSER.add_argument('--os', dest='os', help='Operating System')
PARSER.add_argument('--arch', dest='arch', help='Architecture')
# Toggle actions
PARSER.add_argument('-w', '--download', dest='retrieve', action='store_true', \
help='Download archives', default=True)
PARSER.add_argument('-W', '--no-download', dest='retrieve', action='store_false', \
help='Parse manifest without downloads')
PARSER.add_argument('-s', '--checksum', dest='validate', action='store_true', \
help='Verify SHA256 checksum', default=True)
PARSER.add_argument('-S', '--no-checksum', dest='validate', action='store_false', \
help='Skip SHA256 checksum validation')
PARSER.add_argument('-x', '--extract', dest='unrolled', action='store_true', \
help='Extract archives', default=True)
PARSER.add_argument('-X', '--no-extract', dest='unrolled', action='store_false', \
help='Do not extract archives')
PARSER.add_argument('-f', '--flatten', dest='collapse', action='store_true', \
help='Collapse directories', default=True)
PARSER.add_argument('-F', '--no-flatten', dest='collapse', action='store_false', \
help='Do not collapse directories')
ARGS = PARSER.parse_args()
#print(ARGS)
RETRIEVE = ARGS.retrieve
VALIDATE = ARGS.validate
UNROLLED = ARGS.unrolled
COLLAPSE = ARGS.collapse
# Define variables
if ARGS.label is not None:
LABEL = ARGS.label
if ARGS.product is not None:
PRODUCT = ARGS.product
if ARGS.url is not None:
URL = ARGS.url
if ARGS.output is not None:
OUTPUT = ARGS.output
if ARGS.component is not None:
COMPONENT = ARGS.component
if ARGS.os is not None:
OS = ARGS.os
if ARGS.arch is not None:
ARCH = ARGS.arch
#
# Setup
#
# Sanity check
if not UNROLLED:
COLLAPSE = False
# Short-hand
if LABEL:
if PRODUCT:
URL = f"{DOMAIN}/compute/{PRODUCT}/redist/redistrib_{LABEL}.json"
else:
err("Must pass --product argument")
# Concatentate
if ARCH is not None and OS is not None:
PLATFORM = f"{OS}-{ARCH}"
elif ARCH is not None and OS is None:
err("Must pass --os argument")
elif OS is not None and ARCH is None:
err("Must pass --arch argument")
#
# Run
#
# Parse JSON
try:
MANIFEST = json.loads(http_get(URL))
except json.decoder.JSONDecodeError:
err("redistrib JSON manifest file not found")
print(":: Parsing JSON: " + URL)
# Do stuff
fetch_action(os.path.dirname(URL) + "/")
post_action()
### END ###