Get TF:XLA working in docker (#11483)
XLA:GPU and XLA:CPU baselines are being generated manually on a VM
instance with similar hardware to the runners. This PR adds CUDA and
CuDNN packages to the SHARK dockerfile in order for Tensorflow to run
with XLA in the Docker container, removing the need to run benchmarks
manually to get XLA baselines.
diff --git a/.github/workflows/run_shark_tank.yml b/.github/workflows/run_shark_tank.yml
index 4c1d149..e14933e 100644
--- a/.github/workflows/run_shark_tank.yml
+++ b/.github/workflows/run_shark_tank.yml
@@ -74,7 +74,7 @@
# Only Tensorflow is working at the moment so limit benchmarking to TF models.
run: |
./build_tools/github_actions/docker_run.sh \
- gcr.io/iree-oss/shark@sha256:c72ef54dcb6ec485e8a96b0dfc43307875f4c4c7619f7fdc60bf5220a5672259 \
+ gcr.io/iree-oss/shark@sha256:764dd0af58955eaa46f785bac84c9dbc0f4301fa2cd2fc84c1a23996090b539a \
./build_tools/benchmarks/shark/run_shark.sh "${SHARK_SHA}" "cpu" "cpu" "${SHARK_OUTPUT_DIR}"
- name: "Uploading artifacts"
run: |
@@ -103,7 +103,7 @@
run: |
./build_tools/github_actions/docker_run.sh \
--gpus all \
- gcr.io/iree-oss/shark@sha256:c72ef54dcb6ec485e8a96b0dfc43307875f4c4c7619f7fdc60bf5220a5672259 \
+ gcr.io/iree-oss/shark@sha256:764dd0af58955eaa46f785bac84c9dbc0f4301fa2cd2fc84c1a23996090b539a \
./build_tools/benchmarks/shark/run_shark.sh "${SHARK_SHA}" "cuda" "cuda" "${SHARK_OUTPUT_DIR}"
- name: "Uploading artifacts"
run: |
@@ -138,7 +138,7 @@
- name: "Generating report"
run: |
./build_tools/github_actions/docker_run.sh \
- gcr.io/iree-oss/shark@sha256:c72ef54dcb6ec485e8a96b0dfc43307875f4c4c7619f7fdc60bf5220a5672259 \
+ gcr.io/iree-oss/shark@sha256:764dd0af58955eaa46f785bac84c9dbc0f4301fa2cd2fc84c1a23996090b539a \
./build_tools/benchmarks/shark/run_report.sh "${BENCHMARK_RESULTS_DIR}" \
"${BENCHMARK_RESULTS_DIR}/cpu_baseline.csv" \
"${BENCHMARK_RESULTS_DIR}/cuda_baseline.csv" \
diff --git a/build_tools/docker/dockerfiles/shark.Dockerfile b/build_tools/docker/dockerfiles/shark.Dockerfile
index ec175b2..8b4fad7 100644
--- a/build_tools/docker/dockerfiles/shark.Dockerfile
+++ b/build_tools/docker/dockerfiles/shark.Dockerfile
@@ -6,7 +6,8 @@
# An image for running SHARK tank: https://github.com/nod-ai/SHARK.
-FROM ubuntu@sha256:fd25e706f3dea2a5ff705dbc3353cf37f08307798f3e360a13e9385840f73fb3
+# Ubuntu 22.04
+FROM ubuntu@sha256:4b1d0c4a2d2aaf63b37111f34eb9fa89fa1bf53dd6e4ca954d47caebca4005c2
SHELL ["/bin/bash", "-e", "-u", "-o", "pipefail", "-c"]
@@ -29,22 +30,39 @@
######## Python ########
WORKDIR /install-python
-RUN apt-get update \
- && apt-get install -y software-properties-common \
- && add-apt-repository -y ppa:deadsnakes/ppa \
- && apt-get update \
- && apt-get install -y \
- python3.10 \
- python3.10-dev \
- && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 \
- && apt-get install -y \
- python3-pip \
- python3-setuptools \
- python3-distutils \
- python3-venv \
- python3.10-venv
+ARG PYTHON_VERSION=3.10
+
+COPY runtime/bindings/python/iree/runtime/build_requirements.txt build_tools/docker/context/install_python_deps.sh ./
+RUN ./install_python_deps.sh "${PYTHON_VERSION}" \
+ && rm -rf /install-python
+WORKDIR /
ENV PYTHON_BIN /usr/bin/python3
+##############
+
+######## Cuda ########
+WORKDIR /install-cuda
+
+# We need CUDA Toolkit and CuDNN in order to run the Tensorflow XLA baselines.
+ARG NVIDIA_TOOLKIT_DEB="cuda-repo-ubuntu2204-11-7-local_11.7.0-515.43.04-1_amd64.deb"
+ARG NVIDIA_CUDNN_DEB="cudnn-local-repo-ubuntu2204-8.7.0.84_1.0-1_amd64.deb"
+
+RUN wget -q "https://storage.googleapis.com/iree-shared-files/${NVIDIA_TOOLKIT_DEB}"
+RUN wget -q "https://storage.googleapis.com/iree-shared-files/${NVIDIA_CUDNN_DEB}"
+
+# Install CUDA Toolkit. Instructions from https://developer.nvidia.com/cuda-downloads.
+RUN dpkg -i "${NVIDIA_TOOLKIT_DEB}" \
+ && cp /var/cuda-repo-ubuntu2204-11-7-local/cuda-46B62B5F-keyring.gpg /usr/share/keyrings/ \
+ && apt-get update \
+ && apt-get -y install cuda-toolkit-11.7
+
+# Install CuDNN. Instructions from https://docs.nvidia.com/deeplearning/cudnn/install-guide/index.html.
+RUN dpkg -i "${NVIDIA_CUDNN_DEB}" \
+ && cp /var/cudnn-local-repo-ubuntu2204-8.7.0.84/cudnn-local-BF23AD8A-keyring.gpg /usr/share/keyrings/ \
+ && apt-get update \
+ && apt-get -y install libcudnn8 \
+ && apt-get -y install libcudnn8-dev \
+ && rm -rf /install-cuda
WORKDIR /
##############
@@ -63,13 +81,5 @@
WORKDIR /
ENV VULKAN_SDK="/opt/vulkan-sdk/${VULKAN_SDK_VERSION}/x86_64"
-
ENV PATH="${VULKAN_SDK}/bin:$PATH"
-
-# Symlink the Vulkan loader to a system library directory. This is needed to
-# allow Vulkan applications to find the Vulkan loader. It also avoids using
-# LD_LIBRARY_PATH, which is not supported well by Docker.
-RUN ln -s "${VULKAN_SDK}/lib/libvulkan.so" /usr/lib/x86_64-linux-gnu/ \
- && ln -s "${VULKAN_SDK}/lib/libvulkan.so.1" /usr/lib/x86_64-linux-gnu/
-
############## \
diff --git a/build_tools/docker/prod_digests.txt b/build_tools/docker/prod_digests.txt
index 66655ee..32a33d7 100644
--- a/build_tools/docker/prod_digests.txt
+++ b/build_tools/docker/prod_digests.txt
@@ -10,7 +10,7 @@
gcr.io/iree-oss/emscripten@sha256:20847a0a9503852d9594f3c4e7e633b913b450240b097d378dcd42d22ac0948e
gcr.io/iree-oss/android@sha256:e5f45760ab2dc8a1d72dc6e9d90056719bedac5678ee90e6d9f9b958172b4834
gcr.io/iree-oss/manylinux2014_x86_64-release@sha256:b3b096e4b96746c3ae4cc52e880000d91038e79441334a7cfce9914cbbec1312
-gcr.io/iree-oss/shark@sha256:c72ef54dcb6ec485e8a96b0dfc43307875f4c4c7619f7fdc60bf5220a5672259
+gcr.io/iree-oss/shark@sha256:764dd0af58955eaa46f785bac84c9dbc0f4301fa2cd2fc84c1a23996090b539a
gcr.io/iree-oss/base-bleeding-edge@sha256:479eefb76447c865cf58c5be7ca9fe33f48584b474a1da3dfaa125aad2510463
gcr.io/iree-oss/swiftshader-bleeding-edge@sha256:c22afc61198e14a98e06e5261149de74c629acd2bc61b82e57ec90e5461b69be
gcr.io/iree-oss/mmperf@sha256:7b3bf8a1fafe3603428820865623b133aeecb409d8f79ac7f79b4400b391e207