blob: ffa422077528513b5993ee4b1f43ebb3309cc430 [file] [log] [blame]
#!/usr/bin/env python3
# Copyright 2021 The IREE Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
"""Downloads a file from the web and decompresses it if necessary. NEVER Use
this tool to download from untrusted sources, it doesn't unpack the file
safely.
"""
import argparse
import gzip
import os
import shutil
import tarfile
import urllib.request
import urllib.error
import logging
import time
DEFAULT_MAX_TRIES = 3
RETRY_COOLDOWN_TIME = 1.0
def parse_arguments():
"""Parses command line arguments."""
parser = argparse.ArgumentParser(
description="Downloads a file from the web "
"and decompresses it if necessary. NEVER Use this tool to download from "
"untrusted sources, it doesn't unpack the file safely."
)
parser.add_argument(
"source_url", type=str, metavar="<source-url>", help="Source URL to download"
)
parser.add_argument(
"-o",
"--output",
type=str,
required=True,
metavar="<output-file>",
help="Output file path",
)
parser.add_argument(
"--unpack",
action="store_true",
default=False,
help="Unpack the downloaded file if it's an archive",
)
parser.add_argument(
"--max-tries",
metavar="<max-tries>",
type=int,
default=DEFAULT_MAX_TRIES,
help="Number of tries before giving up",
)
return parser.parse_args()
def download_and_extract(source_url: str, output: str, unpack: bool):
# Open the URL and get the file-like streaming object.
with urllib.request.urlopen(source_url) as response:
if response.status != 200:
raise RuntimeError(
f"Failed to download file with status {response.status} {response.msg}"
)
if unpack:
if source_url.endswith(".tar.gz"):
# Open tar.gz in the streaming mode.
with tarfile.open(fileobj=response, mode="r|*") as tar_file:
if os.path.exists(output):
shutil.rmtree(output)
os.makedirs(output)
tar_file.extractall(output)
return
elif source_url.endswith(".gz"):
# Open gzip from a file-like object, which will be in the streaming mode.
with gzip.open(filename=response, mode="rb") as input_file:
with open(output, "wb") as output_file:
shutil.copyfileobj(input_file, output_file)
return
# Fallback to download the file only.
with open(output, "wb") as output_file:
# Streaming copy.
shutil.copyfileobj(response, output_file)
def main(args):
output_dir = os.path.dirname(args.output)
if not os.path.isdir(output_dir):
os.makedirs(output_dir)
remaining_tries = args.max_tries
while remaining_tries > 0:
try:
download_and_extract(args.source_url, args.output, args.unpack)
break
except (ConnectionResetError, ConnectionRefusedError, urllib.error.URLError):
remaining_tries -= 1
if remaining_tries == 0:
raise
else:
logging.warning(
f"Connection error, remaining {remaining_tries} tries",
exc_info=True,
)
time.sleep(RETRY_COOLDOWN_TIME)
if __name__ == "__main__":
main(parse_arguments())