Get TF:XLA working in docker (#11483) XLA:GPU and XLA:CPU baselines are being generated manually on a VM instance with similar hardware to the runners. This PR adds CUDA and CuDNN packages to the SHARK dockerfile in order for Tensorflow to run with XLA in the Docker container, removing the need to run benchmarks manually to get XLA baselines.

commit: d93b33310fe34eac57708187eef6311663e69762 [log] [tgz]
author: mariecwhite <mariewhite@google.com> Mon Dec 12 17:43:42 2022 -0800
committer: GitHub <noreply@github.com> Tue Dec 13 01:43:42 2022 +0000
tree: dd1661686d4f01eec9b34d03b6489417213bb729
parent: fb7f7d12385328adccba3b1a17b03d5ee6fd2925 [diff]
diff --git a/.github/workflows/run_shark_tank.yml b/.github/workflows/run_shark_tank.yml
index 4c1d149..e14933e 100644
--- a/.github/workflows/run_shark_tank.yml
+++ b/.github/workflows/run_shark_tank.yml

@@ -74,7 +74,7 @@
         # Only Tensorflow is working at the moment so limit benchmarking to TF models.
         run: |
           ./build_tools/github_actions/docker_run.sh \
-            gcr.io/iree-oss/shark@sha256:c72ef54dcb6ec485e8a96b0dfc43307875f4c4c7619f7fdc60bf5220a5672259 \
+            gcr.io/iree-oss/shark@sha256:764dd0af58955eaa46f785bac84c9dbc0f4301fa2cd2fc84c1a23996090b539a \
           ./build_tools/benchmarks/shark/run_shark.sh "${SHARK_SHA}" "cpu" "cpu" "${SHARK_OUTPUT_DIR}"
       - name: "Uploading artifacts"
         run: |
@@ -103,7 +103,7 @@
         run: |
           ./build_tools/github_actions/docker_run.sh \
             --gpus all \
-            gcr.io/iree-oss/shark@sha256:c72ef54dcb6ec485e8a96b0dfc43307875f4c4c7619f7fdc60bf5220a5672259 \
+            gcr.io/iree-oss/shark@sha256:764dd0af58955eaa46f785bac84c9dbc0f4301fa2cd2fc84c1a23996090b539a \
           ./build_tools/benchmarks/shark/run_shark.sh "${SHARK_SHA}" "cuda" "cuda" "${SHARK_OUTPUT_DIR}"
       - name: "Uploading artifacts"
         run: |
@@ -138,7 +138,7 @@
       - name: "Generating report"
         run: |
           ./build_tools/github_actions/docker_run.sh \
-            gcr.io/iree-oss/shark@sha256:c72ef54dcb6ec485e8a96b0dfc43307875f4c4c7619f7fdc60bf5220a5672259 \
+            gcr.io/iree-oss/shark@sha256:764dd0af58955eaa46f785bac84c9dbc0f4301fa2cd2fc84c1a23996090b539a \
           ./build_tools/benchmarks/shark/run_report.sh "${BENCHMARK_RESULTS_DIR}" \
             "${BENCHMARK_RESULTS_DIR}/cpu_baseline.csv" \
             "${BENCHMARK_RESULTS_DIR}/cuda_baseline.csv" \

diff --git a/build_tools/docker/dockerfiles/shark.Dockerfile b/build_tools/docker/dockerfiles/shark.Dockerfile
index ec175b2..8b4fad7 100644
--- a/build_tools/docker/dockerfiles/shark.Dockerfile
+++ b/build_tools/docker/dockerfiles/shark.Dockerfile

@@ -6,7 +6,8 @@
 
 # An image for running SHARK tank: https://github.com/nod-ai/SHARK.
 
-FROM ubuntu@sha256:fd25e706f3dea2a5ff705dbc3353cf37f08307798f3e360a13e9385840f73fb3
+# Ubuntu 22.04
+FROM ubuntu@sha256:4b1d0c4a2d2aaf63b37111f34eb9fa89fa1bf53dd6e4ca954d47caebca4005c2
 
 SHELL ["/bin/bash", "-e", "-u", "-o", "pipefail", "-c"]
 
@@ -29,22 +30,39 @@
 ######## Python ########
 WORKDIR /install-python
 
-RUN apt-get update \
-  && apt-get install -y software-properties-common \
-  && add-apt-repository -y ppa:deadsnakes/ppa \
-  && apt-get update \
-  && apt-get install -y \
-    python3.10 \
-    python3.10-dev \
-  && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 \
-  && apt-get install -y \
-    python3-pip \
-    python3-setuptools \
-    python3-distutils \
-    python3-venv \
-    python3.10-venv
+ARG PYTHON_VERSION=3.10
+
+COPY runtime/bindings/python/iree/runtime/build_requirements.txt build_tools/docker/context/install_python_deps.sh ./
+RUN ./install_python_deps.sh "${PYTHON_VERSION}" \
+  && rm -rf /install-python
+WORKDIR /
 
 ENV PYTHON_BIN /usr/bin/python3
+##############
+
+######## Cuda ########
+WORKDIR /install-cuda
+
+# We need CUDA Toolkit and CuDNN in order to run the Tensorflow XLA baselines.
+ARG NVIDIA_TOOLKIT_DEB="cuda-repo-ubuntu2204-11-7-local_11.7.0-515.43.04-1_amd64.deb"
+ARG NVIDIA_CUDNN_DEB="cudnn-local-repo-ubuntu2204-8.7.0.84_1.0-1_amd64.deb"
+
+RUN wget -q "https://storage.googleapis.com/iree-shared-files/${NVIDIA_TOOLKIT_DEB}"
+RUN wget -q "https://storage.googleapis.com/iree-shared-files/${NVIDIA_CUDNN_DEB}"
+
+# Install CUDA Toolkit. Instructions from https://developer.nvidia.com/cuda-downloads.
+RUN dpkg -i "${NVIDIA_TOOLKIT_DEB}" \
+  && cp /var/cuda-repo-ubuntu2204-11-7-local/cuda-46B62B5F-keyring.gpg /usr/share/keyrings/ \
+  && apt-get update \
+  && apt-get -y install cuda-toolkit-11.7
+
+# Install CuDNN. Instructions from https://docs.nvidia.com/deeplearning/cudnn/install-guide/index.html.
+RUN dpkg -i "${NVIDIA_CUDNN_DEB}" \
+  && cp /var/cudnn-local-repo-ubuntu2204-8.7.0.84/cudnn-local-BF23AD8A-keyring.gpg /usr/share/keyrings/ \
+  && apt-get update \
+  && apt-get -y install libcudnn8 \
+  && apt-get -y install libcudnn8-dev \
+  && rm -rf /install-cuda
 
 WORKDIR /
 ##############
@@ -63,13 +81,5 @@
 WORKDIR /
 
 ENV VULKAN_SDK="/opt/vulkan-sdk/${VULKAN_SDK_VERSION}/x86_64"
-
 ENV PATH="${VULKAN_SDK}/bin:$PATH"
-
-# Symlink the Vulkan loader to a system library directory. This is needed to
-# allow Vulkan applications to find the Vulkan loader. It also avoids using
-# LD_LIBRARY_PATH, which is not supported well by Docker.
-RUN ln -s "${VULKAN_SDK}/lib/libvulkan.so" /usr/lib/x86_64-linux-gnu/ \
-  && ln -s "${VULKAN_SDK}/lib/libvulkan.so.1" /usr/lib/x86_64-linux-gnu/
-
 ############## \

diff --git a/build_tools/docker/prod_digests.txt b/build_tools/docker/prod_digests.txt
index 66655ee..32a33d7 100644
--- a/build_tools/docker/prod_digests.txt
+++ b/build_tools/docker/prod_digests.txt

@@ -10,7 +10,7 @@
 gcr.io/iree-oss/emscripten@sha256:20847a0a9503852d9594f3c4e7e633b913b450240b097d378dcd42d22ac0948e
 gcr.io/iree-oss/android@sha256:e5f45760ab2dc8a1d72dc6e9d90056719bedac5678ee90e6d9f9b958172b4834
 gcr.io/iree-oss/manylinux2014_x86_64-release@sha256:b3b096e4b96746c3ae4cc52e880000d91038e79441334a7cfce9914cbbec1312
-gcr.io/iree-oss/shark@sha256:c72ef54dcb6ec485e8a96b0dfc43307875f4c4c7619f7fdc60bf5220a5672259
+gcr.io/iree-oss/shark@sha256:764dd0af58955eaa46f785bac84c9dbc0f4301fa2cd2fc84c1a23996090b539a
 gcr.io/iree-oss/base-bleeding-edge@sha256:479eefb76447c865cf58c5be7ca9fe33f48584b474a1da3dfaa125aad2510463
 gcr.io/iree-oss/swiftshader-bleeding-edge@sha256:c22afc61198e14a98e06e5261149de74c629acd2bc61b82e57ec90e5461b69be
 gcr.io/iree-oss/mmperf@sha256:7b3bf8a1fafe3603428820865623b133aeecb409d8f79ac7f79b4400b391e207
commit	d93b33310fe34eac57708187eef6311663e69762	[log] [tgz]
author	mariecwhite <mariewhite@google.com>	Mon Dec 12 17:43:42 2022 -0800
committer	GitHub <noreply@github.com>	Tue Dec 13 01:43:42 2022 +0000
tree	dd1661686d4f01eec9b34d03b6489417213bb729
parent	fb7f7d12385328adccba3b1a17b03d5ee6fd2925 [diff]