Extract and improve script to download CUDA toolkit components.

Previous to this, we were fetching a sample from an NVIDIA github repo and using CMake scripting to use it to download an appropriate SDK. This patch:

* Forks the parse_redist.py sample locally into third_party/nvidia_sdk_download.
* Fixes a number of things in parse_redist.py to make it more robust, remove warnings, and eliminate the dependency on the Python requests package.
* Removes the 'requests' package from all requirements files as it is no longer needed.
* Adds a fetch_cuda_toolkit.py which duplicates the behavior that was open coded in CMake scripting.
* Updates the build_tools/third_party/cuda/CMakeLists.txt to use the new script instead of its internal approach.

In a follow-on, I will use this script on the Bazel side to make it auto-fetch the CUDA SDK as needed as well.
diff --git a/build_tools/third_party/cuda/CMakeLists.txt b/build_tools/third_party/cuda/CMakeLists.txt
index 87b986d..bca76d6 100644
--- a/build_tools/third_party/cuda/CMakeLists.txt
+++ b/build_tools/third_party/cuda/CMakeLists.txt
@@ -5,79 +5,18 @@
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 function(fetch_cuda_toolkit)
-  # Parameters to the download script.
-  # Look for an appropriate redistrib_*.json here to verify:
-  #   https://developer.download.nvidia.com/compute/cuda/redist/
-  set(_VERSION "11.6.2")
-  set(_PRODUCT "cuda")
-  if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
-    set(_OS "linux")
-  elseif(WIN32)
-    set(_OS "windows")
-  else()
-    message(SEND_ERROR "Unsupported OS environment. Must be Windows or Linux.")
-    return()
+  set(_DOWNLOAD_SCRIPT_PATH "${IREE_SOURCE_DIR}/third_party/nvidia_sdk_download/fetch_cuda_toolkit.py")
+  message(STATUS "Checking and downloading CUDA SDK toolkit components")
+  execute_process(COMMAND ${Python3_EXECUTABLE}
+    "${_DOWNLOAD_SCRIPT_PATH}" "${CMAKE_CURRENT_BINARY_DIR}"
+    RESULT_VARIABLE _EXEC_RESULT
+    OUTPUT_VARIABLE _ACTUAL_DOWNLOAD_PATH
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+  )
+  if(_EXEC_RESULT AND NOT _EXEC_RESULT EQUAL 0)
+    message(FATAL_ERROR "Error fetching CUDA toolkit")
   endif()
-  # CUDA is only supported on Linux/Windows where x64 is the only arch for now.
-  # Note: CMAKE_HOST_SYSTEM_PROCESSOR may be AMD64 on Windows, but we still
-  # want to use `x86_64` here.
-  set(_ARCH "x86_64")
-
-  set(_TARGET_DIR "${CMAKE_CURRENT_BINARY_DIR}/${_VERSION}")
-  set(_DOWNLOAD_SCRIPT_URL "https://raw.githubusercontent.com/NVIDIA/build-system-archive-import-examples/44dfb51fad75a8a2f1044a4fe221aba70571b86f/parse_redist.py")
-  set(_DOWNLOAD_SCRIPT_PATH "${_TARGET_DIR}/parse_redist.py")
-
-  # Only download if haven't already.
-  # This will produce a unified directory tree under:
-  #   flat/$OS-$ARCH
-  set(_ARCH_DIR "${_TARGET_DIR}/${_OS}-${_ARCH}")
-  set(_TOUCH_FILE "${_TARGET_DIR}/cuda_toolkit.downloaded")
-
-  if(NOT EXISTS "${_TOUCH_FILE}")
-    # The parse_redist.py script requires the Python requests module, which
-    # is not yet installed by default. Check for it.
-    execute_process(
-      COMMAND ${Python3_EXECUTABLE} -c "import requests"
-      RESULT_VARIABLE _PY_MODULES_EXIST_CODE
-      OUTPUT_QUIET
-    )
-    if(NOT ${_PY_MODULES_EXIST_CODE} EQUAL 0)
-      message(SEND_ERROR "CUDA auto-download requires Python packages that do not exist on your system. Recommend running: \n  ${Python3_EXECUTABLE} -m pip install requests")
-      return()
-    endif()
-
-    # Components that we need to fetch.
-    set(_COMPONENTS_TO_FETCH "")
-    list(APPEND _COMPONENTS_TO_FETCH "cuda_nvcc")
-    list(APPEND _COMPONENTS_TO_FETCH "cuda_cudart")
-
-    message(STATUS "Extracting CUDA Toolkit to ${_TARGET_DIR}")
-    file(MAKE_DIRECTORY ${_TARGET_DIR})
-
-    # First fetch the download script to its own directory.
-    file(DOWNLOAD ${_DOWNLOAD_SCRIPT_URL} ${_DOWNLOAD_SCRIPT_PATH})
-
-    # Then use the download script to fetch and flatten each component we want
-    # into the target dir.
-    foreach(COMPONENT ${_COMPONENTS_TO_FETCH})
-      message(STATUS "Downloading component ${COMPONENT}")
-      execute_process(COMMAND ${Python3_EXECUTABLE} "${_DOWNLOAD_SCRIPT_PATH}"
-        --label "${_VERSION}"
-        --product "${_PRODUCT}"
-        --os "${_OS}"
-        --arch "${_ARCH}"
-        --component "${COMPONENT}"
-        --output "${_TARGET_DIR}")
-    endforeach()
-  endif()
-
-  if(NOT EXISTS "${_ARCH_DIR}")
-    message(FATAL_ERROR "Download did not produce expected source dir: ${_ARCH_DIR}")
-    return()
-  endif()
-
-  file(TOUCH "${_TOUCH_FILE}")
-  set(CUDAToolkit_ROOT "${_ARCH_DIR}" PARENT_SCOPE)
+  set(CUDAToolkit_ROOT ${_ACTUAL_DOWNLOAD_PATH} PARENT_SCOPE)
 endfunction()
 
 if(DEFINED ENV{IREE_CUDA_DEPS_DIR})
diff --git a/compiler/pyproject.toml b/compiler/pyproject.toml
index a56208f..ecff85c 100644
--- a/compiler/pyproject.toml
+++ b/compiler/pyproject.toml
@@ -9,6 +9,5 @@
     "packaging",
     "pybind11>=2.10.1",
     "PyYAML",
-    "requests",
 ]
 build-backend = "setuptools.build_meta"
diff --git a/runtime/pyproject.toml b/runtime/pyproject.toml
index 0c3625f..4ff00b6 100644
--- a/runtime/pyproject.toml
+++ b/runtime/pyproject.toml
@@ -7,6 +7,5 @@
     "packaging",
     "pybind11>=2.10.1",
     "PyYAML",
-    "requests",
 ]
 build-backend = "setuptools.build_meta"
diff --git a/third_party/nvidia_sdk_download/fetch_cuda_toolkit.py b/third_party/nvidia_sdk_download/fetch_cuda_toolkit.py
new file mode 100755
index 0000000..05aedde
--- /dev/null
+++ b/third_party/nvidia_sdk_download/fetch_cuda_toolkit.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+# Copyright 2022 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+"""Fetches components of the CUDA toolkit that we need to build.
+
+Syntax:
+  fetch_cuda_toolkit.py {output_dir}
+
+This will download an appropriate toolkit (subset) and print the full path
+to the resulting directory (which will be a sub-directory of the output_dir).
+"""
+
+from pathlib import Path
+import platform
+import shutil
+import subprocess
+import sys
+
+VERSION = "11.6.2"
+PRODUCT = "cuda"
+COMPONENTS = ["cuda_nvcc", "cuda_cudart"]
+
+
+def main(output_dir: Path):
+  system = platform.system()
+  if system == "Linux":
+    os = "linux"
+  elif system == "Windows":
+    os = "windows"
+  else:
+    print("ERROR: Fetching CUDA toolkit only supported on windows and linux")
+    sys.exit(1)
+
+  arch = platform.machine()
+  if arch == "AMD64":
+    arch = "x86_64"
+
+  target_dir = output_dir / VERSION
+  arch_dir = target_dir / f"{os}-{arch}"
+  touch_file = arch_dir / "cuda_toolkit.downloaded"
+  if touch_file.exists():
+    print(f"Not downloading because touch file exists: {touch_file}",
+          file=sys.stderr)
+  else:
+    # Remove and create arch dir.
+    if arch_dir.exists():
+      shutil.rmtree(arch_dir)
+    arch_dir.mkdir(parents=True, exist_ok=True)
+
+    for component in COMPONENTS:
+      print(f"Downloading component {component}", file=sys.stderr)
+      subprocess.check_call([
+          sys.executable,
+          str(Path(__file__).resolve().parent / "parse_redist.py"),
+          "--label",
+          VERSION,
+          "--product",
+          PRODUCT,
+          "--os",
+          os,
+          "--arch",
+          arch,
+          "--component",
+          component,
+          "--output",
+          target_dir,
+      ],
+                            cwd=target_dir,
+                            stdout=sys.stderr)
+
+    # Touch the file to note done.
+    with open(touch_file, "w") as f:
+      pass
+
+  # Report back.
+  print(arch_dir)
+
+
+if __name__ == "__main__":
+  if len(sys.argv) != 2:
+    print("ERROR: Expected output_dir", file=sys.stderr)
+    sys.exit(1)
+  main(Path(sys.argv[1]))
diff --git a/third_party/nvidia_sdk_download/parse_redist.py b/third_party/nvidia_sdk_download/parse_redist.py
index fc9079d..020afd5 100644
--- a/third_party/nvidia_sdk_download/parse_redist.py
+++ b/third_party/nvidia_sdk_download/parse_redist.py
@@ -7,8 +7,15 @@
 2. Validates SHA256 checksums
 3. Extracts archives
 4. Flattens into a collapsed directory structure
+
+Forked from https://github.com/NVIDIA/build-system-archive-import-examples/blob/355e25cca11725e88984443a6a343dffeb43308a/parse_redist.py
+and patched:
+  - avoid a dependency on the non-standard requests package (see
+the http_get helper) by using urllib directly
+  - explicit error handling on hash mismatch
+  - always download, even if files exist
+  - remove dependence on deprecated distutils copy_tree in favor of shutil
 """
-from distutils.dir_util import copy_tree
 import argparse
 import os.path
 import hashlib
@@ -18,7 +25,7 @@
 import tarfile
 import zipfile
 import sys
-import requests
+import urllib.request
 
 __version__ = "0.1.0"
 
@@ -40,6 +47,21 @@
 COLLAPSE = True
 
 
+def http_get(url):
+  """Fetch the contents of a URL."""
+  with urllib.request.urlopen(url) as f:
+    data = f.read()
+    if hasattr(f, "status"):
+      # For >= 3.9
+      status_code = f.status
+    else:
+      # Deprecated in 3.9
+      statuc_code = f.code
+    if status_code != 200:
+      raise IOError("  -> Failed to download: " + url)
+    return data
+
+
 def err(msg):
   """Print error message and exit"""
   print("ERROR: " + msg)
@@ -48,14 +70,11 @@
 
 def fetch_file(full_path, filename):
   """Download file to disk"""
-  download = requests.get(full_path)
-  if download.status_code != 200:
-    print("  -> Failed: " + filename)
-  else:
-    print(":: Fetching: " + full_path)
-    with open(filename, "wb") as file:
-      file.write(download.content)
-      print("  -> Wrote: " + filename)
+  print(":: Fetching: " + full_path)
+  download_data = http_get(full_path)
+  with open(filename, "wb") as file:
+    file.write(download_data)
+    print("  -> Wrote: " + filename)
 
 
 def get_hash(filename):
@@ -77,18 +96,31 @@
   if checksum == sha256:
     print("     Verified sha256sum: " + sha256)
   else:
-    print("  => Mismatch sha256sum:")
-    print("    -> Calculation: " + sha256)
-    print("    -> Expectation: " + checksum)
+    raise IOError(f"Mismatch sha256sum: Calculation={sha256}, "
+                  f"Expectation={checksum} for {filename}")
 
 
 def flatten_tree(src, dest):
   """Merge hierarchy from multiple directories"""
+
+  # Should use shutil.copytree(dirs_exist_ok=True), but that isn't available
+  # until Python 3.8.
+  def copytree(src, dst):
+    if not os.path.exists(dst):
+      os.makedirs(dst)
+    for item in os.listdir(src):
+      s = os.path.join(src, item)
+      d = os.path.join(dst, item)
+      if os.path.isdir(s):
+        copytree(s, d)
+      else:
+        if not os.path.exists(d):
+          shutil.copy2(s, d)
+
   try:
-    copy_tree(src, dest, preserve_symlinks=1, update=1, verbose=1)
-  except FileExistsError:
-    pass
-  shutil.rmtree(src)
+    copytree(src, dest)
+  finally:
+    shutil.rmtree(src)
 
 
 def fetch_action(parent):
@@ -116,7 +148,7 @@
         filename = os.path.basename(full_path)
         ARCHIVES[platform].append(filename)
 
-        if RETRIEVE and not os.path.exists(filename):
+        if RETRIEVE:
           # Download archive
           fetch_file(full_path, filename)
         elif os.path.exists(filename):
@@ -257,7 +289,7 @@
 
 # Parse JSON
 try:
-  MANIFEST = requests.get(URL).json()
+  MANIFEST = json.loads(http_get(URL))
 except json.decoder.JSONDecodeError:
   err("redistrib JSON manifest file not found")